chromium/src/third_party/blink/renderer/platform/graphics/cpu/arm/webgl_image_conversion_neon.h - manifest_repos/chromium_src - Git at Google

 /*
  * Copyright (C) 2012 Gabor Rapcsanyi (rgabor@inf.u-szeged.hu), University of
  * Szeged
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL UNIVERSITY OF SZEGED OR
  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 #ifndef THIRD_PARTY_BLINK_RENDERER_PLATFORM_GRAPHICS_CPU_ARM_WEBGL_IMAGE_CONVERSION_NEON_H_
 #define THIRD_PARTY_BLINK_RENDERER_PLATFORM_GRAPHICS_CPU_ARM_WEBGL_IMAGE_CONVERSION_NEON_H_

 #include "base/compiler_specific.h"

 #if defined(CPU_ARM_NEON)

 #include <arm_neon.h>

 namespace blink {

 namespace simd {

 ALWAYS_INLINE void UnpackOneRowOfRGBA16LittleToRGBA8(const uint16_t*& source,
                                                      uint8_t*& destination,
                                                      unsigned& pixels_per_row) {
   unsigned components_per_row = pixels_per_row * 4;
   unsigned tail_components = components_per_row % 16;
   unsigned components_size = components_per_row - tail_components;
   const uint8_t* src = reinterpret_cast<const uint8_t*>(source);

   for (unsigned i = 0; i < components_size; i += 16) {
     uint8x16x2_t components = vld2q_u8(src + i * 2);
     vst1q_u8(destination + i, components.val[1]);
   }

   source += components_size;
   destination += components_size;
   pixels_per_row = tail_components / 4;
 }

 ALWAYS_INLINE void UnpackOneRowOfRGB16LittleToRGBA8(const uint16_t*& source,
                                                     uint8_t*& destination,
                                                     unsigned& pixels_per_row) {
   unsigned components_per_row = pixels_per_row * 3;
   unsigned tail_components = components_per_row % 24;
   unsigned components_size = components_per_row - tail_components;

   uint8x8_t component_a = vdup_n_u8(0xFF);
   for (unsigned i = 0; i < components_size; i += 24) {
     uint16x8x3_t rgb16 = vld3q_u16(source + i);
     uint8x8_t component_r = vqmovn_u16(vshrq_n_u16(rgb16.val[0], 8));
     uint8x8_t component_g = vqmovn_u16(vshrq_n_u16(rgb16.val[1], 8));
     uint8x8_t component_b = vqmovn_u16(vshrq_n_u16(rgb16.val[2], 8));
     uint8x8x4_t rgba8 = {{component_r, component_g, component_b, component_a}};
     vst4_u8(destination, rgba8);
     destination += 32;
   }

   source += components_size;
   pixels_per_row = tail_components / 3;
 }

 ALWAYS_INLINE void UnpackOneRowOfARGB16LittleToRGBA8(const uint16_t*& source,
                                                      uint8_t*& destination,
                                                      unsigned& pixels_per_row) {
   unsigned components_per_row = pixels_per_row * 4;
   unsigned tail_components = components_per_row % 32;
   unsigned components_size = components_per_row - tail_components;

   for (unsigned i = 0; i < components_size; i += 32) {
     uint16x8x4_t argb16 = vld4q_u16(source + i);
     uint8x8_t component_a = vqmovn_u16(vshrq_n_u16(argb16.val[0], 8));
     uint8x8_t component_r = vqmovn_u16(vshrq_n_u16(argb16.val[1], 8));
     uint8x8_t component_g = vqmovn_u16(vshrq_n_u16(argb16.val[2], 8));
     uint8x8_t component_b = vqmovn_u16(vshrq_n_u16(argb16.val[3], 8));
     uint8x8x4_t rgba8 = {{component_r, component_g, component_b, component_a}};
     vst4_u8(destination + i, rgba8);
   }

   source += components_size;
   destination += components_size;
   pixels_per_row = tail_components / 4;
 }

 ALWAYS_INLINE void UnpackOneRowOfBGRA16LittleToRGBA8(const uint16_t*& source,
                                                      uint8_t*& destination,
                                                      unsigned& pixels_per_row) {
   unsigned components_per_row = pixels_per_row * 4;
   unsigned tail_components = components_per_row % 32;
   unsigned components_size = components_per_row - tail_components;

   for (unsigned i = 0; i < components_size; i += 32) {
     uint16x8x4_t argb16 = vld4q_u16(source + i);
     uint8x8_t component_b = vqmovn_u16(vshrq_n_u16(argb16.val[0], 8));
     uint8x8_t component_g = vqmovn_u16(vshrq_n_u16(argb16.val[1], 8));
     uint8x8_t component_r = vqmovn_u16(vshrq_n_u16(argb16.val[2], 8));
     uint8x8_t component_a = vqmovn_u16(vshrq_n_u16(argb16.val[3], 8));
     uint8x8x4_t rgba8 = {{component_r, component_g, component_b, component_a}};
     vst4_u8(destination + i, rgba8);
   }

   source += components_size;
   destination += components_size;
   pixels_per_row = tail_components / 4;
 }

 ALWAYS_INLINE void UnpackOneRowOfRGBA4444ToRGBA8(const uint16_t*& source,
                                                  uint8_t*& destination,
                                                  unsigned& pixels_per_row) {
   unsigned tail_pixels = pixels_per_row % 8;
   unsigned pixel_size = pixels_per_row - tail_pixels;

   uint16x8_t immediate0x0f = vdupq_n_u16(0x0F);
   for (unsigned i = 0; i < pixel_size; i += 8) {
     uint16x8_t eight_pixels = vld1q_u16(source + i);

     uint8x8_t component_r = vqmovn_u16(vshrq_n_u16(eight_pixels, 12));
     uint8x8_t component_g =
         vqmovn_u16(vandq_u16(vshrq_n_u16(eight_pixels, 8), immediate0x0f));
     uint8x8_t component_b =
         vqmovn_u16(vandq_u16(vshrq_n_u16(eight_pixels, 4), immediate0x0f));
     uint8x8_t component_a = vqmovn_u16(vandq_u16(eight_pixels, immediate0x0f));

     component_r = vorr_u8(vshl_n_u8(component_r, 4), component_r);
     component_g = vorr_u8(vshl_n_u8(component_g, 4), component_g);
     component_b = vorr_u8(vshl_n_u8(component_b, 4), component_b);
     component_a = vorr_u8(vshl_n_u8(component_a, 4), component_a);

     uint8x8x4_t dest_components = {
         {component_r, component_g, component_b, component_a}};
     vst4_u8(destination, dest_components);
     destination += 32;
   }

   source += pixel_size;
   pixels_per_row = tail_pixels;
 }

 ALWAYS_INLINE void PackOneRowOfRGBA8ToUnsignedShort4444(
     const uint8_t*& source,
     uint16_t*& destination,
     unsigned& pixels_per_row) {
   unsigned components_per_row = pixels_per_row * 4;
   unsigned tail_components = components_per_row % 32;
   unsigned components_size = components_per_row - tail_components;

   uint8_t* dst = reinterpret_cast<uint8_t*>(destination);
   uint8x8_t immediate0xf0 = vdup_n_u8(0xF0);
   for (unsigned i = 0; i < components_size; i += 32) {
     uint8x8x4_t rgba8 = vld4_u8(source + i);

     uint8x8_t component_r = vand_u8(rgba8.val[0], immediate0xf0);
     uint8x8_t component_g = vshr_n_u8(vand_u8(rgba8.val[1], immediate0xf0), 4);
     uint8x8_t component_b = vand_u8(rgba8.val[2], immediate0xf0);
     uint8x8_t component_a = vshr_n_u8(vand_u8(rgba8.val[3], immediate0xf0), 4);

     uint8x8x2_t rgba4;
     rgba4.val[0] = vorr_u8(component_b, component_a);
     rgba4.val[1] = vorr_u8(component_r, component_g);
     vst2_u8(dst, rgba4);
     dst += 16;
   }

   source += components_size;
   destination += components_size / 4;
   pixels_per_row = tail_components / 4;
 }

 ALWAYS_INLINE void UnpackOneRowOfRGBA5551ToRGBA8(const uint16_t*& source,
                                                  uint8_t*& destination,
                                                  unsigned& pixels_per_row) {
   unsigned tail_pixels = pixels_per_row % 8;
   unsigned pixel_size = pixels_per_row - tail_pixels;

   uint8x8_t immediate0x7 = vdup_n_u8(0x7);
   uint8x8_t immediate0xff = vdup_n_u8(0xFF);
   uint16x8_t immediate0x1f = vdupq_n_u16(0x1F);
   uint16x8_t immediate0x1 = vdupq_n_u16(0x1);

   for (unsigned i = 0; i < pixel_size; i += 8) {
     uint16x8_t eight_pixels = vld1q_u16(source + i);

     uint8x8_t component_r = vqmovn_u16(vshrq_n_u16(eight_pixels, 11));
     uint8x8_t component_g =
         vqmovn_u16(vandq_u16(vshrq_n_u16(eight_pixels, 6), immediate0x1f));
     uint8x8_t component_b =
         vqmovn_u16(vandq_u16(vshrq_n_u16(eight_pixels, 1), immediate0x1f));
     uint8x8_t component_a = vqmovn_u16(vandq_u16(eight_pixels, immediate0x1));

     component_r =
         vorr_u8(vshl_n_u8(component_r, 3), vand_u8(component_r, immediate0x7));
     component_g =
         vorr_u8(vshl_n_u8(component_g, 3), vand_u8(component_g, immediate0x7));
     component_b =
         vorr_u8(vshl_n_u8(component_b, 3), vand_u8(component_b, immediate0x7));
     component_a = vmul_u8(component_a, immediate0xff);

     uint8x8x4_t dest_components = {
         {component_r, component_g, component_b, component_a}};
     vst4_u8(destination, dest_components);
     destination += 32;
   }

   source += pixel_size;
   pixels_per_row = tail_pixels;
 }

 ALWAYS_INLINE void PackOneRowOfRGBA8ToUnsignedShort5551(
     const uint8_t*& source,
     uint16_t*& destination,
     unsigned& pixels_per_row) {
   unsigned components_per_row = pixels_per_row * 4;
   unsigned tail_components = components_per_row % 32;
   unsigned components_size = components_per_row - tail_components;

   uint8_t* dst = reinterpret_cast<uint8_t*>(destination);

   uint8x8_t immediate0xf8 = vdup_n_u8(0xF8);
   uint8x8_t immediate0x18 = vdup_n_u8(0x18);
   for (unsigned i = 0; i < components_size; i += 32) {
     uint8x8x4_t rgba8 = vld4_u8(source + i);

     uint8x8_t component_r = vand_u8(rgba8.val[0], immediate0xf8);
     uint8x8_t component_g3bit = vshr_n_u8(rgba8.val[1], 5);

     uint8x8_t component_g2bit =
         vshl_n_u8(vand_u8(rgba8.val[1], immediate0x18), 3);
     uint8x8_t component_b = vshr_n_u8(vand_u8(rgba8.val[2], immediate0xf8), 2);
     uint8x8_t component_a = vshr_n_u8(rgba8.val[3], 7);

     uint8x8x2_t rgba5551;
     rgba5551.val[0] =
         vorr_u8(vorr_u8(component_g2bit, component_b), component_a);
     rgba5551.val[1] = vorr_u8(component_r, component_g3bit);
     vst2_u8(dst, rgba5551);
     dst += 16;
   }

   source += components_size;
   destination += components_size / 4;
   pixels_per_row = tail_components / 4;
 }

 ALWAYS_INLINE void PackOneRowOfRGBA8ToUnsignedShort565(
     const uint8_t*& source,
     uint16_t*& destination,
     unsigned& pixels_per_row) {
   unsigned components_per_row = pixels_per_row * 4;
   unsigned tail_components = components_per_row % 32;
   unsigned components_size = components_per_row - tail_components;
   uint8_t* dst = reinterpret_cast<uint8_t*>(destination);

   uint8x8_t immediate0xf8 = vdup_n_u8(0xF8);
   uint8x8_t immediate0x1c = vdup_n_u8(0x1C);
   for (unsigned i = 0; i < components_size; i += 32) {
     uint8x8x4_t rgba8 = vld4_u8(source + i);

     uint8x8_t component_r = vand_u8(rgba8.val[0], immediate0xf8);
     uint8x8_t component_g_left = vshr_n_u8(rgba8.val[1], 5);
     uint8x8_t component_g_right =
         vshl_n_u8(vand_u8(rgba8.val[1], immediate0x1c), 3);
     uint8x8_t component_b = vshr_n_u8(vand_u8(rgba8.val[2], immediate0xf8), 3);

     uint8x8x2_t rgb565;
     rgb565.val[0] = vorr_u8(component_g_right, component_b);
     rgb565.val[1] = vorr_u8(component_r, component_g_left);
     vst2_u8(dst, rgb565);
     dst += 16;
   }

   source += components_size;
   destination += components_size / 4;
   pixels_per_row = tail_components / 4;
 }

 }  // namespace simd

 }  // namespace blink

 #endif  // CPU_ARM_NEON

 #endif  // THIRD_PARTY_BLINK_RENDERER_PLATFORM_GRAPHICS_CPU_ARM_WEBGL_IMAGE_CONVERSION_NEON_H_
	/*
	* Copyright (C) 2012 Gabor Rapcsanyi (rgabor@inf.u-szeged.hu), University of
	* Szeged
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	*
	* THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY
	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF SZEGED OR
	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/

	#ifndef THIRD_PARTY_BLINK_RENDERER_PLATFORM_GRAPHICS_CPU_ARM_WEBGL_IMAGE_CONVERSION_NEON_H_
	#define THIRD_PARTY_BLINK_RENDERER_PLATFORM_GRAPHICS_CPU_ARM_WEBGL_IMAGE_CONVERSION_NEON_H_

	#include "base/compiler_specific.h"

	#if defined(CPU_ARM_NEON)

	#include <arm_neon.h>

	namespace blink {

	namespace simd {

	ALWAYS_INLINE void UnpackOneRowOfRGBA16LittleToRGBA8(const uint16_t*& source,
	uint8_t*& destination,
	unsigned& pixels_per_row) {
	unsigned components_per_row = pixels_per_row * 4;
	unsigned tail_components = components_per_row % 16;
	unsigned components_size = components_per_row - tail_components;
	const uint8_t* src = reinterpret_cast<const uint8_t*>(source);

	for (unsigned i = 0; i < components_size; i += 16) {
	uint8x16x2_t components = vld2q_u8(src + i * 2);
	vst1q_u8(destination + i, components.val[1]);
	}

	source += components_size;
	destination += components_size;
	pixels_per_row = tail_components / 4;
	}

	ALWAYS_INLINE void UnpackOneRowOfRGB16LittleToRGBA8(const uint16_t*& source,
	uint8_t*& destination,
	unsigned& pixels_per_row) {
	unsigned components_per_row = pixels_per_row * 3;
	unsigned tail_components = components_per_row % 24;
	unsigned components_size = components_per_row - tail_components;

	uint8x8_t component_a = vdup_n_u8(0xFF);
	for (unsigned i = 0; i < components_size; i += 24) {
	uint16x8x3_t rgb16 = vld3q_u16(source + i);
	uint8x8_t component_r = vqmovn_u16(vshrq_n_u16(rgb16.val[0], 8));
	uint8x8_t component_g = vqmovn_u16(vshrq_n_u16(rgb16.val[1], 8));
	uint8x8_t component_b = vqmovn_u16(vshrq_n_u16(rgb16.val[2], 8));
	uint8x8x4_t rgba8 = {{component_r, component_g, component_b, component_a}};
	vst4_u8(destination, rgba8);
	destination += 32;
	}

	source += components_size;
	pixels_per_row = tail_components / 3;
	}

	ALWAYS_INLINE void UnpackOneRowOfARGB16LittleToRGBA8(const uint16_t*& source,
	uint8_t*& destination,
	unsigned& pixels_per_row) {
	unsigned components_per_row = pixels_per_row * 4;
	unsigned tail_components = components_per_row % 32;
	unsigned components_size = components_per_row - tail_components;

	for (unsigned i = 0; i < components_size; i += 32) {
	uint16x8x4_t argb16 = vld4q_u16(source + i);
	uint8x8_t component_a = vqmovn_u16(vshrq_n_u16(argb16.val[0], 8));
	uint8x8_t component_r = vqmovn_u16(vshrq_n_u16(argb16.val[1], 8));
	uint8x8_t component_g = vqmovn_u16(vshrq_n_u16(argb16.val[2], 8));
	uint8x8_t component_b = vqmovn_u16(vshrq_n_u16(argb16.val[3], 8));
	uint8x8x4_t rgba8 = {{component_r, component_g, component_b, component_a}};
	vst4_u8(destination + i, rgba8);
	}

	source += components_size;
	destination += components_size;
	pixels_per_row = tail_components / 4;
	}

	ALWAYS_INLINE void UnpackOneRowOfBGRA16LittleToRGBA8(const uint16_t*& source,
	uint8_t*& destination,
	unsigned& pixels_per_row) {
	unsigned components_per_row = pixels_per_row * 4;
	unsigned tail_components = components_per_row % 32;
	unsigned components_size = components_per_row - tail_components;

	for (unsigned i = 0; i < components_size; i += 32) {
	uint16x8x4_t argb16 = vld4q_u16(source + i);
	uint8x8_t component_b = vqmovn_u16(vshrq_n_u16(argb16.val[0], 8));
	uint8x8_t component_g = vqmovn_u16(vshrq_n_u16(argb16.val[1], 8));
	uint8x8_t component_r = vqmovn_u16(vshrq_n_u16(argb16.val[2], 8));
	uint8x8_t component_a = vqmovn_u16(vshrq_n_u16(argb16.val[3], 8));
	uint8x8x4_t rgba8 = {{component_r, component_g, component_b, component_a}};
	vst4_u8(destination + i, rgba8);
	}

	source += components_size;
	destination += components_size;
	pixels_per_row = tail_components / 4;
	}

	ALWAYS_INLINE void UnpackOneRowOfRGBA4444ToRGBA8(const uint16_t*& source,
	uint8_t*& destination,
	unsigned& pixels_per_row) {
	unsigned tail_pixels = pixels_per_row % 8;
	unsigned pixel_size = pixels_per_row - tail_pixels;

	uint16x8_t immediate0x0f = vdupq_n_u16(0x0F);
	for (unsigned i = 0; i < pixel_size; i += 8) {
	uint16x8_t eight_pixels = vld1q_u16(source + i);

	uint8x8_t component_r = vqmovn_u16(vshrq_n_u16(eight_pixels, 12));
	uint8x8_t component_g =
	vqmovn_u16(vandq_u16(vshrq_n_u16(eight_pixels, 8), immediate0x0f));
	uint8x8_t component_b =
	vqmovn_u16(vandq_u16(vshrq_n_u16(eight_pixels, 4), immediate0x0f));
	uint8x8_t component_a = vqmovn_u16(vandq_u16(eight_pixels, immediate0x0f));

	component_r = vorr_u8(vshl_n_u8(component_r, 4), component_r);
	component_g = vorr_u8(vshl_n_u8(component_g, 4), component_g);
	component_b = vorr_u8(vshl_n_u8(component_b, 4), component_b);
	component_a = vorr_u8(vshl_n_u8(component_a, 4), component_a);

	uint8x8x4_t dest_components = {
	{component_r, component_g, component_b, component_a}};
	vst4_u8(destination, dest_components);
	destination += 32;
	}

	source += pixel_size;
	pixels_per_row = tail_pixels;
	}

	ALWAYS_INLINE void PackOneRowOfRGBA8ToUnsignedShort4444(
	const uint8_t*& source,
	uint16_t*& destination,
	unsigned& pixels_per_row) {
	unsigned components_per_row = pixels_per_row * 4;
	unsigned tail_components = components_per_row % 32;
	unsigned components_size = components_per_row - tail_components;

	uint8_t* dst = reinterpret_cast<uint8_t*>(destination);
	uint8x8_t immediate0xf0 = vdup_n_u8(0xF0);
	for (unsigned i = 0; i < components_size; i += 32) {
	uint8x8x4_t rgba8 = vld4_u8(source + i);

	uint8x8_t component_r = vand_u8(rgba8.val[0], immediate0xf0);
	uint8x8_t component_g = vshr_n_u8(vand_u8(rgba8.val[1], immediate0xf0), 4);
	uint8x8_t component_b = vand_u8(rgba8.val[2], immediate0xf0);
	uint8x8_t component_a = vshr_n_u8(vand_u8(rgba8.val[3], immediate0xf0), 4);

	uint8x8x2_t rgba4;
	rgba4.val[0] = vorr_u8(component_b, component_a);
	rgba4.val[1] = vorr_u8(component_r, component_g);
	vst2_u8(dst, rgba4);
	dst += 16;
	}

	source += components_size;
	destination += components_size / 4;
	pixels_per_row = tail_components / 4;
	}

	ALWAYS_INLINE void UnpackOneRowOfRGBA5551ToRGBA8(const uint16_t*& source,
	uint8_t*& destination,
	unsigned& pixels_per_row) {
	unsigned tail_pixels = pixels_per_row % 8;
	unsigned pixel_size = pixels_per_row - tail_pixels;

	uint8x8_t immediate0x7 = vdup_n_u8(0x7);
	uint8x8_t immediate0xff = vdup_n_u8(0xFF);
	uint16x8_t immediate0x1f = vdupq_n_u16(0x1F);
	uint16x8_t immediate0x1 = vdupq_n_u16(0x1);

	for (unsigned i = 0; i < pixel_size; i += 8) {
	uint16x8_t eight_pixels = vld1q_u16(source + i);

	uint8x8_t component_r = vqmovn_u16(vshrq_n_u16(eight_pixels, 11));
	uint8x8_t component_g =
	vqmovn_u16(vandq_u16(vshrq_n_u16(eight_pixels, 6), immediate0x1f));
	uint8x8_t component_b =
	vqmovn_u16(vandq_u16(vshrq_n_u16(eight_pixels, 1), immediate0x1f));
	uint8x8_t component_a = vqmovn_u16(vandq_u16(eight_pixels, immediate0x1));

	component_r =
	vorr_u8(vshl_n_u8(component_r, 3), vand_u8(component_r, immediate0x7));
	component_g =
	vorr_u8(vshl_n_u8(component_g, 3), vand_u8(component_g, immediate0x7));
	component_b =
	vorr_u8(vshl_n_u8(component_b, 3), vand_u8(component_b, immediate0x7));
	component_a = vmul_u8(component_a, immediate0xff);

	uint8x8x4_t dest_components = {
	{component_r, component_g, component_b, component_a}};
	vst4_u8(destination, dest_components);
	destination += 32;
	}

	source += pixel_size;
	pixels_per_row = tail_pixels;
	}

	ALWAYS_INLINE void PackOneRowOfRGBA8ToUnsignedShort5551(
	const uint8_t*& source,
	uint16_t*& destination,
	unsigned& pixels_per_row) {
	unsigned components_per_row = pixels_per_row * 4;
	unsigned tail_components = components_per_row % 32;
	unsigned components_size = components_per_row - tail_components;

	uint8_t* dst = reinterpret_cast<uint8_t*>(destination);

	uint8x8_t immediate0xf8 = vdup_n_u8(0xF8);
	uint8x8_t immediate0x18 = vdup_n_u8(0x18);
	for (unsigned i = 0; i < components_size; i += 32) {
	uint8x8x4_t rgba8 = vld4_u8(source + i);

	uint8x8_t component_r = vand_u8(rgba8.val[0], immediate0xf8);
	uint8x8_t component_g3bit = vshr_n_u8(rgba8.val[1], 5);

	uint8x8_t component_g2bit =
	vshl_n_u8(vand_u8(rgba8.val[1], immediate0x18), 3);
	uint8x8_t component_b = vshr_n_u8(vand_u8(rgba8.val[2], immediate0xf8), 2);
	uint8x8_t component_a = vshr_n_u8(rgba8.val[3], 7);

	uint8x8x2_t rgba5551;
	rgba5551.val[0] =
	vorr_u8(vorr_u8(component_g2bit, component_b), component_a);
	rgba5551.val[1] = vorr_u8(component_r, component_g3bit);
	vst2_u8(dst, rgba5551);
	dst += 16;
	}

	source += components_size;
	destination += components_size / 4;
	pixels_per_row = tail_components / 4;
	}

	ALWAYS_INLINE void PackOneRowOfRGBA8ToUnsignedShort565(
	const uint8_t*& source,
	uint16_t*& destination,
	unsigned& pixels_per_row) {
	unsigned components_per_row = pixels_per_row * 4;
	unsigned tail_components = components_per_row % 32;
	unsigned components_size = components_per_row - tail_components;
	uint8_t* dst = reinterpret_cast<uint8_t*>(destination);

	uint8x8_t immediate0xf8 = vdup_n_u8(0xF8);
	uint8x8_t immediate0x1c = vdup_n_u8(0x1C);
	for (unsigned i = 0; i < components_size; i += 32) {
	uint8x8x4_t rgba8 = vld4_u8(source + i);

	uint8x8_t component_r = vand_u8(rgba8.val[0], immediate0xf8);
	uint8x8_t component_g_left = vshr_n_u8(rgba8.val[1], 5);
	uint8x8_t component_g_right =
	vshl_n_u8(vand_u8(rgba8.val[1], immediate0x1c), 3);
	uint8x8_t component_b = vshr_n_u8(vand_u8(rgba8.val[2], immediate0xf8), 3);

	uint8x8x2_t rgb565;
	rgb565.val[0] = vorr_u8(component_g_right, component_b);
	rgb565.val[1] = vorr_u8(component_r, component_g_left);
	vst2_u8(dst, rgb565);
	dst += 16;
	}

	source += components_size;
	destination += components_size / 4;
	pixels_per_row = tail_components / 4;
	}

	} // namespace simd

	} // namespace blink

	#endif // CPU_ARM_NEON

	#endif // THIRD_PARTY_BLINK_RENDERER_PLATFORM_GRAPHICS_CPU_ARM_WEBGL_IMAGE_CONVERSION_NEON_H_