20#include <freerdp/config.h> 
   22#include <freerdp/types.h> 
   23#include <freerdp/primitives.h> 
   24#include <winpr/sysinfo.h> 
   26#include "prim_internal.h" 
   27#include "prim_YCoCg.h" 
   29#if defined(NEON_INTRINSICS_ENABLED) 
   34static pstatus_t neon_YCoCgToRGB_8u_X(
const BYTE* WINPR_RESTRICT pSrc, INT32 srcStep,
 
   35                                      BYTE* WINPR_RESTRICT pDst, UINT32 DstFormat, INT32 dstStep,
 
   36                                      UINT32 width, UINT32 height, UINT8 shift, BYTE bPos,
 
   37                                      BYTE gPos, BYTE rPos, BYTE aPos, BOOL alpha)
 
   40  const BYTE* sptr = pSrc;
 
   41  const DWORD formatSize = FreeRDPGetBytesPerPixel(DstFormat);
 
   42  const int8_t cll = shift - 1; 
 
   43  const UINT32 srcPad = srcStep - (width * 4);
 
   44  const UINT32 dstPad = dstStep - (width * formatSize);
 
   45  const UINT32 pad = width % 8;
 
   46  const uint8x8_t aVal = vdup_n_u8(0xFF);
 
   47  const int8x8_t cllv = vdup_n_s8(cll);
 
   49  for (UINT32 y = 0; y < height; y++)
 
   51    for (UINT32 x = 0; x < width - pad; x += 8)
 
   54      const uint8x8x4_t raw = vld4_u8(sptr);
 
   55      const int8x8_t CgRaw = vreinterpret_s8_u8(vshl_u8(raw.val[0], cllv));
 
   56      const int8x8_t CoRaw = vreinterpret_s8_u8(vshl_u8(raw.val[1], cllv));
 
   57      const int16x8_t Cg = vmovl_s8(CgRaw);
 
   58      const int16x8_t Co = vmovl_s8(CoRaw);
 
   59      const int16x8_t Y = vreinterpretq_s16_u16(vmovl_u8(raw.val[2])); 
 
   60      const int16x8_t T = vsubq_s16(Y, Cg);
 
   61      const int16x8_t R = vaddq_s16(T, Co);
 
   62      const int16x8_t G = vaddq_s16(Y, Cg);
 
   63      const int16x8_t B = vsubq_s16(T, Co);
 
   65      bgrx.val[bPos] = vqmovun_s16(B);
 
   66      bgrx.val[gPos] = vqmovun_s16(G);
 
   67      bgrx.val[rPos] = vqmovun_s16(R);
 
   70        bgrx.val[aPos] = raw.val[3];
 
   72        bgrx.val[aPos] = aVal;
 
   79    for (UINT32 x = 0; x < pad; x++)
 
   82      const INT16 Cg = (INT16)((INT8)((*sptr++) << cll));
 
   83      const INT16 Co = (INT16)((INT8)((*sptr++) << cll));
 
   84      const INT16 Y = (INT16)(*sptr++); 
 
   85      const INT16 T = Y - Cg;
 
   86      const INT16 R = T + Co;
 
   87      const INT16 G = Y + Cg;
 
   88      const INT16 B = T - Co;
 
  108  return PRIMITIVES_SUCCESS;
 
  111static pstatus_t neon_YCoCgToRGB_8u_AC4R(
const BYTE* WINPR_RESTRICT pSrc, INT32 srcStep,
 
  112                                         BYTE* WINPR_RESTRICT pDst, UINT32 DstFormat, INT32 dstStep,
 
  113                                         UINT32 width, UINT32 height, UINT8 shift, BOOL withAlpha)
 
  117    case PIXEL_FORMAT_BGRA32:
 
  118      return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height,
 
  119                                  shift, 2, 1, 0, 3, withAlpha);
 
  121    case PIXEL_FORMAT_BGRX32:
 
  122      return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height,
 
  123                                  shift, 2, 1, 0, 3, withAlpha);
 
  125    case PIXEL_FORMAT_RGBA32:
 
  126      return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height,
 
  127                                  shift, 0, 1, 2, 3, withAlpha);
 
  129    case PIXEL_FORMAT_RGBX32:
 
  130      return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height,
 
  131                                  shift, 0, 1, 2, 3, withAlpha);
 
  133    case PIXEL_FORMAT_ARGB32:
 
  134      return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height,
 
  135                                  shift, 1, 2, 3, 0, withAlpha);
 
  137    case PIXEL_FORMAT_XRGB32:
 
  138      return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height,
 
  139                                  shift, 1, 2, 3, 0, withAlpha);
 
  141    case PIXEL_FORMAT_ABGR32:
 
  142      return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height,
 
  143                                  shift, 3, 2, 1, 0, withAlpha);
 
  145    case PIXEL_FORMAT_XBGR32:
 
  146      return neon_YCoCgToRGB_8u_X(pSrc, srcStep, pDst, DstFormat, dstStep, width, height,
 
  147                                  shift, 3, 2, 1, 0, withAlpha);
 
  150      return generic->YCoCgToRGB_8u_AC4R(pSrc, srcStep, pDst, DstFormat, dstStep, width,
 
  151                                         height, shift, withAlpha);
 
  157void primitives_init_YCoCg_neon_int(
primitives_t* WINPR_RESTRICT prims)
 
  159#if defined(NEON_INTRINSICS_ENABLED) 
  160  generic = primitives_get_generic();
 
  162  WLog_VRB(PRIM_TAG, 
"NEON optimizations");
 
  163  prims->YCoCgToRGB_8u_AC4R = neon_YCoCgToRGB_8u_AC4R;
 
  165  WLog_VRB(PRIM_TAG, 
"undefined WITH_SIMD or neon intrinsics not available");