23#include <freerdp/config.h>
25#include <freerdp/types.h>
26#include <freerdp/primitives.h>
27#include <winpr/sysinfo.h>
29#include "prim_alphaComp.h"
31#include "prim_internal.h"
32#include "prim_avxsse.h"
35#if defined(SSE_AVX_INTRINSICS_ENABLED)
41static pstatus_t sse2_alphaComp_argb(
const BYTE* WINPR_RESTRICT pSrc1, UINT32 src1Step,
42 const BYTE* WINPR_RESTRICT pSrc2, UINT32 src2Step,
43 BYTE* WINPR_RESTRICT pDst, UINT32 dstStep, UINT32 width,
46 const UINT32* sptr1 = (
const UINT32*)pSrc1;
47 const UINT32* sptr2 = (
const UINT32*)pSrc2;
49 if ((width <= 0) || (height <= 0))
50 return PRIMITIVES_SUCCESS;
54 return generic->alphaComp_argb(pSrc1, src1Step, pSrc2, src2Step, pDst, dstStep, width,
58 UINT32* dptr = (UINT32*)pDst;
59 const size_t linebytes = width *
sizeof(UINT32);
60 const size_t src1Jump = (src1Step - linebytes) /
sizeof(UINT32);
61 const size_t src2Jump = (src2Step - linebytes) /
sizeof(UINT32);
62 const size_t dstJump = (dstStep - linebytes) /
sizeof(UINT32);
63 __m128i xmm0 = mm_set1_epu32(0);
64 __m128i xmm1 = _mm_set1_epi16(1);
66 for (UINT32 y = 0; y < height; ++y)
68 uint32_t pixels = width;
73 switch ((ULONG_PTR)dptr & 0x0f)
101 pstatus_t status = 0;
102 status =
generic->alphaComp_argb((
const BYTE*)sptr1, src1Step, (
const BYTE*)sptr2,
103 src2Step, (BYTE*)dptr, dstStep, leadIn, 1);
104 if (status != PRIMITIVES_SUCCESS)
115 pixels -= count << 2;
126 xmm2 = LOAD_SI128(sptr1);
129 xmm3 = LOAD_SI128(sptr2);
132 xmm4 = _mm_unpackhi_epi8(xmm2, xmm0);
134 xmm5 = _mm_unpackhi_epi8(xmm3, xmm0);
136 xmm6 = _mm_subs_epi16(xmm4, xmm5);
138 xmm4 = _mm_shufflelo_epi16(xmm4, 0xff);
140 xmm4 = _mm_shufflehi_epi16(xmm4, 0xff);
142 xmm4 = _mm_adds_epi16(xmm4, xmm1);
144 xmm4 = _mm_mullo_epi16(xmm4, xmm6);
146 xmm4 = _mm_srai_epi16(xmm4, 8);
148 xmm4 = _mm_adds_epi16(xmm4, xmm5);
151 xmm5 = _mm_unpacklo_epi8(xmm2, xmm0);
153 xmm6 = _mm_unpacklo_epi8(xmm3, xmm0);
155 xmm7 = _mm_subs_epi16(xmm5, xmm6);
157 xmm5 = _mm_shufflelo_epi16(xmm5, 0xff);
159 xmm5 = _mm_shufflehi_epi16(xmm5, 0xff);
161 xmm5 = _mm_adds_epi16(xmm5, xmm1);
163 xmm5 = _mm_mullo_epi16(xmm5, xmm7);
165 xmm5 = _mm_srai_epi16(xmm5, 8);
167 xmm5 = _mm_adds_epi16(xmm5, xmm6);
170 xmm3 = _mm_set1_epi16(0x00ffU);
171 xmm4 = _mm_and_si128(xmm4, xmm3);
172 xmm5 = _mm_and_si128(xmm5, xmm3);
174 xmm5 = _mm_packus_epi16(xmm5, xmm4);
175 STORE_SI128(dptr, xmm5);
182 pstatus_t status = 0;
183 status =
generic->alphaComp_argb((
const BYTE*)sptr1, src1Step, (
const BYTE*)sptr2,
184 src2Step, (BYTE*)dptr, dstStep, pixels, 1);
185 if (status != PRIMITIVES_SUCCESS)
199 return PRIMITIVES_SUCCESS;
204void primitives_init_alphaComp_sse3_int(
primitives_t* WINPR_RESTRICT prims)
206#if defined(SSE_AVX_INTRINSICS_ENABLED)
207 generic = primitives_get_generic();
208 WLog_VRB(PRIM_TAG,
"SSE2/SSE3 optimizations");
209 prims->alphaComp_argb = sse2_alphaComp_argb;
212 WLog_VRB(PRIM_TAG,
"undefined WITH_SIMD or SSE3 intrinsics not available");