FreeRDP
Loading...
Searching...
No Matches
prim_YUV_opencl.c
1
21#include <freerdp/config.h>
22
23#include <freerdp/types.h>
24#include <freerdp/primitives.h>
25#include "prim_internal.h"
26
27#if defined(WITH_OPENCL)
28#ifdef __APPLE__
29#include "OpenCL/opencl.h"
30#else
31#include <CL/cl.h>
32#endif
33#include "primitives-opencl-program.h"
34
35#include <freerdp/log.h>
36#define TAG FREERDP_TAG("primitives")
37
38typedef struct
39{
40 BOOL support;
41 cl_platform_id platformId;
42 cl_device_id deviceId;
43 cl_context context;
44 cl_command_queue commandQueue;
45 cl_program program;
46} primitives_opencl_context;
47
48typedef struct
49{
50 primitives_opencl_context* cl;
51 cl_kernel kernel;
52 cl_mem srcObjs[3];
53 cl_mem dstObj;
54 prim_size_t roi;
55 size_t dstStep;
56} primitives_cl_kernel;
57
58static primitives_opencl_context* primitives_get_opencl_context(void);
59
60static void cl_kernel_free(primitives_cl_kernel* kernel)
61{
62 if (!kernel)
63 return;
64
65 if (kernel->dstObj)
66 clReleaseMemObject(kernel->dstObj);
67
68 for (size_t i = 0; i < ARRAYSIZE(kernel->srcObjs); i++)
69 {
70 cl_mem obj = kernel->srcObjs[i];
71 kernel->srcObjs[i] = NULL;
72 if (obj)
73 clReleaseMemObject(obj);
74 }
75
76 if (kernel->kernel)
77 clReleaseKernel(kernel->kernel);
78
79 free(kernel);
80}
81
82static primitives_cl_kernel* cl_kernel_new(const char* kernelName, const prim_size_t* roi)
83{
84 WINPR_ASSERT(kernelName);
85 WINPR_ASSERT(roi);
86
87 primitives_cl_kernel* kernel = calloc(1, sizeof(primitives_cl_kernel));
88 if (!kernel)
89 goto fail;
90
91 kernel->roi = *roi;
92 kernel->cl = primitives_get_opencl_context();
93 if (!kernel->cl)
94 goto fail;
95
96 cl_int ret = CL_INVALID_VALUE;
97 kernel->kernel = clCreateKernel(kernel->cl->program, kernelName, &ret);
98 if (ret != CL_SUCCESS)
99 {
100 WLog_ERR(TAG, "openCL: unable to create kernel %s", kernelName);
101 goto fail;
102 }
103
104 return kernel;
105fail:
106 cl_kernel_free(kernel);
107 return NULL;
108}
109
110static BOOL cl_kernel_set_sources(primitives_cl_kernel* ctx, const BYTE* WINPR_RESTRICT pSrc[3],
111 const UINT32 srcStep[3])
112{
113 const char* sourceNames[] = { "Y", "U", "V" };
114
115 WINPR_ASSERT(ctx);
116 WINPR_ASSERT(pSrc);
117 WINPR_ASSERT(srcStep);
118
119 for (cl_uint i = 0; i < ARRAYSIZE(ctx->srcObjs); i++)
120 {
121 cl_int ret = CL_INVALID_VALUE;
122 const BYTE* csrc = pSrc[i];
123 void* WINPR_RESTRICT src = WINPR_CAST_CONST_PTR_AWAY(csrc, void* WINPR_RESTRICT);
124 ctx->srcObjs[i] = clCreateBuffer(ctx->cl->context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
125 1ull * srcStep[i] * ctx->roi.height, src, &ret);
126 if (ret != CL_SUCCESS)
127 {
128 WLog_ERR(TAG, "unable to create %sobj", sourceNames[i]);
129 return FALSE;
130 }
131
132 ret = clSetKernelArg(ctx->kernel, i * 2, sizeof(cl_mem), (const void*)&ctx->srcObjs[i]);
133 if (ret != CL_SUCCESS)
134 {
135 WLog_ERR(TAG, "unable to set arg for %sobj", sourceNames[i]);
136 return FALSE;
137 }
138
139 ret = clSetKernelArg(ctx->kernel, i * 2 + 1, sizeof(cl_uint), &srcStep[i]);
140 if (ret != CL_SUCCESS)
141 {
142 WLog_ERR(TAG, "unable to set arg stride for %sobj", sourceNames[i]);
143 return FALSE;
144 }
145 }
146
147 return TRUE;
148}
149
150static BOOL cl_kernel_set_destination(primitives_cl_kernel* ctx, UINT32 dstStep)
151{
152
153 WINPR_ASSERT(ctx);
154
155 ctx->dstStep = dstStep;
156 cl_int ret = CL_INVALID_VALUE;
157 ctx->dstObj = clCreateBuffer(ctx->cl->context, CL_MEM_WRITE_ONLY,
158 1ull * dstStep * ctx->roi.height, NULL, &ret);
159 if (ret != CL_SUCCESS)
160 {
161 WLog_ERR(TAG, "unable to create dest obj");
162 return FALSE;
163 }
164
165 ret = clSetKernelArg(ctx->kernel, 6, sizeof(cl_mem), (const void*)&ctx->dstObj);
166 if (ret != CL_SUCCESS)
167 {
168 WLog_ERR(TAG, "unable to set arg destObj");
169 return FALSE;
170 }
171
172 ret = clSetKernelArg(ctx->kernel, 7, sizeof(cl_uint), &dstStep);
173 if (ret != CL_SUCCESS)
174 {
175 WLog_ERR(TAG, "unable to set arg dstStep");
176 return FALSE;
177 }
178
179 return TRUE;
180}
181
182static BOOL cl_kernel_process(primitives_cl_kernel* ctx, BYTE* pDst)
183{
184 WINPR_ASSERT(ctx);
185 WINPR_ASSERT(pDst);
186
187 size_t indexes[2] = { 0 };
188 indexes[0] = ctx->roi.width;
189 indexes[1] = ctx->roi.height;
190
191 cl_int ret = clEnqueueNDRangeKernel(ctx->cl->commandQueue, ctx->kernel, 2, NULL, indexes, NULL,
192 0, NULL, NULL);
193 if (ret != CL_SUCCESS)
194 {
195 WLog_ERR(TAG, "unable to enqueue call kernel");
196 return FALSE;
197 }
198
199 /* Transfer result to host */
200 ret = clEnqueueReadBuffer(ctx->cl->commandQueue, ctx->dstObj, CL_TRUE, 0,
201 ctx->roi.height * ctx->dstStep, pDst, 0, NULL, NULL);
202 if (ret != CL_SUCCESS)
203 {
204 WLog_ERR(TAG, "unable to read back buffer");
205 return FALSE;
206 }
207
208 return TRUE;
209}
210
211static pstatus_t opencl_YUVToRGB(const char* kernelName, const BYTE* WINPR_RESTRICT pSrc[3],
212 const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst, UINT32 dstStep,
213 const prim_size_t* WINPR_RESTRICT roi)
214{
215 pstatus_t res = -1;
216
217 primitives_cl_kernel* ctx = cl_kernel_new(kernelName, roi);
218 if (!ctx)
219 goto fail;
220
221 if (!cl_kernel_set_sources(ctx, pSrc, srcStep))
222 goto fail;
223
224 if (!cl_kernel_set_destination(ctx, dstStep))
225 goto fail;
226
227 if (!cl_kernel_process(ctx, pDst))
228 goto fail;
229
230 res = PRIMITIVES_SUCCESS;
231
232fail:
233 cl_kernel_free(ctx);
234 return res;
235}
236
237static primitives_opencl_context openclContext = { 0 };
238
239static primitives_opencl_context* primitives_get_opencl_context(void)
240{
241 return &openclContext;
242}
243
244static void cl_context_free(primitives_opencl_context* ctx)
245{
246 if (!ctx)
247 return;
248 clReleaseProgram(ctx->program);
249 clReleaseCommandQueue(ctx->commandQueue);
250 clReleaseContext(ctx->context);
251 clReleaseDevice(ctx->deviceId);
252 ctx->support = FALSE;
253}
254
255static pstatus_t primitives_uninit_opencl(void)
256{
257 if (!openclContext.support)
258 return PRIMITIVES_SUCCESS;
259
260 cl_context_free(&openclContext);
261 return PRIMITIVES_SUCCESS;
262}
263
264static BOOL primitives_init_opencl_context(primitives_opencl_context* WINPR_RESTRICT prims)
265{
266 cl_uint ndevices = 0;
267 cl_uint nplatforms = 0;
268 cl_kernel kernel = NULL;
269
270 BOOL gotGPU = FALSE;
271 size_t programLen = 0;
272
273 cl_int ret = clGetPlatformIDs(0, NULL, &nplatforms);
274 if (ret != CL_SUCCESS || nplatforms < 1)
275 return FALSE;
276
277 cl_platform_id* platform_ids = (cl_platform_id*)calloc(nplatforms, sizeof(cl_platform_id));
278 if (!platform_ids)
279 return FALSE;
280
281 ret = clGetPlatformIDs(nplatforms, platform_ids, &nplatforms);
282 if (ret != CL_SUCCESS)
283 {
284 free((void*)platform_ids);
285 return FALSE;
286 }
287
288 for (cl_uint i = 0; (i < nplatforms) && !gotGPU; i++)
289 {
290 cl_device_id device_id = NULL;
291 cl_context context = NULL;
292 char platformName[1000] = { 0 };
293 char deviceName[1000] = { 0 };
294
295 ret = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_NAME, sizeof(platformName),
296 platformName, NULL);
297 if (ret != CL_SUCCESS)
298 continue;
299
300 ret = clGetDeviceIDs(platform_ids[i], CL_DEVICE_TYPE_GPU, 1, &device_id, &ndevices);
301 if (ret != CL_SUCCESS)
302 continue;
303
304 ret = clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
305 if (ret != CL_SUCCESS)
306 {
307 WLog_ERR(TAG, "openCL: unable get device name for platform %s", platformName);
308 clReleaseDevice(device_id);
309 continue;
310 }
311
312 context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
313 if (ret != CL_SUCCESS)
314 {
315 WLog_ERR(TAG, "openCL: unable to create context for platform %s, device %s",
316 platformName, deviceName);
317 clReleaseDevice(device_id);
318 continue;
319 }
320
321#if defined(CL_VERSION_2_0)
322 prims->commandQueue = clCreateCommandQueueWithProperties(context, device_id, NULL, &ret);
323#else
324 prims->commandQueue = clCreateCommandQueue(context, device_id, 0, &ret);
325#endif
326 if (ret != CL_SUCCESS)
327 {
328 WLog_ERR(TAG, "openCL: unable to create command queue");
329 clReleaseContext(context);
330 clReleaseDevice(device_id);
331 continue;
332 }
333
334 WLog_INFO(TAG, "openCL: using platform=%s device=%s", platformName, deviceName);
335
336 prims->platformId = platform_ids[i];
337 prims->deviceId = device_id;
338 prims->context = context;
339 gotGPU = TRUE;
340 }
341
342 free((void*)platform_ids);
343
344 if (!gotGPU)
345 {
346 WLog_ERR(TAG, "openCL: no GPU found");
347 return FALSE;
348 }
349
350 programLen = strnlen(openclProgram, sizeof(openclProgram));
351 const char* ptr = openclProgram;
352 prims->program = clCreateProgramWithSource(prims->context, 1, &ptr, &programLen, &ret);
353 if (ret != CL_SUCCESS)
354 {
355 WLog_ERR(TAG, "openCL: unable to create program");
356 goto fail;
357 }
358
359 ret = clBuildProgram(prims->program, 1, &prims->deviceId, NULL, NULL, NULL);
360 if (ret != CL_SUCCESS)
361 {
362 size_t length = 0;
363 char buffer[2048];
364 ret = clGetProgramBuildInfo(prims->program, prims->deviceId, CL_PROGRAM_BUILD_LOG,
365 sizeof(buffer), buffer, &length);
366 if (ret != CL_SUCCESS)
367 {
368 WLog_ERR(TAG,
369 "openCL: building program failed but unable to retrieve buildLog, error=%d",
370 ret);
371 }
372 else
373 {
374 WLog_ERR(TAG, "openCL: unable to build program, errorLog=%s", buffer);
375 }
376 goto fail;
377 }
378
379 kernel = clCreateKernel(prims->program, "yuv420_to_bgra_1b", &ret);
380 if (ret != CL_SUCCESS)
381 {
382 WLog_ERR(TAG, "openCL: unable to create yuv420_to_bgra_1b kernel");
383 goto fail;
384 }
385 clReleaseKernel(kernel);
386
387 prims->support = TRUE;
388 return TRUE;
389
390fail:
391 cl_context_free(prims);
392 return FALSE;
393}
394
395static pstatus_t opencl_YUV420ToRGB_8u_P3AC4R(const BYTE* WINPR_RESTRICT pSrc[3],
396 const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst,
397 UINT32 dstStep, UINT32 DstFormat,
398 const prim_size_t* WINPR_RESTRICT roi)
399{
400 const char* kernel_name = NULL;
401
402 switch (DstFormat)
403 {
404 case PIXEL_FORMAT_ABGR32:
405 kernel_name = "yuv420_to_abgr_1b";
406 break;
407 case PIXEL_FORMAT_XBGR32:
408 kernel_name = "yuv420_to_xbgr_1b";
409 break;
410 case PIXEL_FORMAT_RGBX32:
411 kernel_name = "yuv420_to_rgba_1b";
412 break;
413 case PIXEL_FORMAT_RGBA32:
414 kernel_name = "yuv420_to_rgbx_1b";
415 break;
416 case PIXEL_FORMAT_BGRA32:
417 kernel_name = "yuv420_to_bgra_1b";
418 break;
419 case PIXEL_FORMAT_BGRX32:
420 kernel_name = "yuv420_to_bgrx_1b";
421 break;
422 case PIXEL_FORMAT_XRGB32:
423 kernel_name = "yuv420_to_xrgb_1b";
424 break;
425 case PIXEL_FORMAT_ARGB32:
426 kernel_name = "yuv420_to_argb_1b";
427 break;
428 default:
429 {
430 primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU);
431 if (!p)
432 return -1;
433 return p->YUV420ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
434 }
435 }
436
437 return opencl_YUVToRGB(kernel_name, pSrc, srcStep, pDst, dstStep, roi);
438}
439
440static pstatus_t opencl_YUV444ToRGB_8u_P3AC4R(const BYTE* WINPR_RESTRICT pSrc[3],
441 const UINT32 srcStep[3], BYTE* WINPR_RESTRICT pDst,
442 UINT32 dstStep, UINT32 DstFormat,
443 const prim_size_t* WINPR_RESTRICT roi)
444{
445 const char* kernel_name = NULL;
446
447 switch (DstFormat)
448 {
449 case PIXEL_FORMAT_ABGR32:
450 kernel_name = "yuv444_to_abgr_1b";
451 break;
452 case PIXEL_FORMAT_XBGR32:
453 kernel_name = "yuv444_to_xbgr_1b";
454 break;
455 case PIXEL_FORMAT_RGBX32:
456 kernel_name = "yuv444_to_rgba_1b";
457 break;
458 case PIXEL_FORMAT_RGBA32:
459 kernel_name = "yuv444_to_rgbx_1b";
460 break;
461 case PIXEL_FORMAT_BGRA32:
462 kernel_name = "yuv444_to_bgra_1b";
463 break;
464 case PIXEL_FORMAT_BGRX32:
465 kernel_name = "yuv444_to_bgrx_1b";
466 break;
467 case PIXEL_FORMAT_XRGB32:
468 kernel_name = "yuv444_to_xrgb_1b";
469 break;
470 case PIXEL_FORMAT_ARGB32:
471 kernel_name = "yuv444_to_argb_1b";
472 break;
473 default:
474 {
475 primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU);
476 if (!p)
477 return -1;
478 return p->YUV444ToRGB_8u_P3AC4R(pSrc, srcStep, pDst, dstStep, DstFormat, roi);
479 }
480 }
481
482 return opencl_YUVToRGB(kernel_name, pSrc, srcStep, pDst, dstStep, roi);
483}
484
485BOOL primitives_init_opencl(primitives_t* prims)
486{
487 primitives_t* p = primitives_get_by_type(PRIMITIVES_ONLY_CPU);
488 if (!prims || !p)
489 return FALSE;
490 *prims = *p;
491
492 if (!primitives_init_opencl_context(&openclContext))
493 return FALSE;
494
495 prims->YUV420ToRGB_8u_P3AC4R = opencl_YUV420ToRGB_8u_P3AC4R;
496 prims->YUV444ToRGB_8u_P3AC4R = opencl_YUV444ToRGB_8u_P3AC4R;
497 prims->flags |= PRIM_FLAGS_HAVE_EXTGPU;
498 prims->uninit = primitives_uninit_opencl;
499 return TRUE;
500}
501#endif