api/encoding_8c_source.html

#include <winpr/assert.h>

#include <winpr/winpr.h>


#include "camera.h"


#define TAG CHANNELS_TAG("rdpecam-video.client")


#if defined(WITH_INPUT_FORMAT_H264)

/*

 * demux a H264 frame from a MJPG container

 * args:

 *    srcData - pointer to buffer with h264 muxed in MJPG container

 *    srcSize - buff size

 *    h264_data - pointer to h264 data

 *    h264_max_size - maximum size allowed by h264_data buffer

 *

 * Credits:

 *    guvcview    http://guvcview.sourceforge.net

 *    Paulo Assis <pj.assis@gmail.com>

 *

 * see Figure 5 Payload Size in USB_Video_Payload_H 264_1 0.pdf

 * for format details

 *

 * @return: data size and copies demuxed data to h264 buffer

 */

static size_t demux_uvcH264(const BYTE* srcData, size_t srcSize, BYTE* h264_data,

                            size_t h264_max_size)

{

  WINPR_ASSERT(h264_data);

  WINPR_ASSERT(srcData);


  if (srcSize < 30)

  {

    WLog_ERR(TAG, "Expected srcSize >= 30, got %" PRIuz, srcSize);

    return 0;

  }

  const uint8_t* spl = NULL;

  uint8_t* ph264 = h264_data;


  /* search for 1st APP4 marker

   * (30 = 2 APP4 marker + 2 length + 22 header + 4 payload size)

   */

  for (const uint8_t* sp = srcData; sp < srcData + srcSize - 30; sp++)

  {

    if (sp[0] == 0xFF && sp[1] == 0xE4)

    {

      spl = sp + 2; /* exclude APP4 marker */

      break;

    }

  }


  if (spl == NULL)

  {

    WLog_ERR(TAG, "Expected 1st APP4 marker but none found");

    return 0;

  }


  if (spl > srcData + srcSize - 4)

  {

    WLog_ERR(TAG, "Payload + Header size bigger than srcData buffer");

    return 0;

  }


  /* 1st segment length in big endian

   * includes payload size + header + 6 bytes (2 length + 4 payload size)

   */

  uint16_t length = (uint16_t)(spl[0] << 8) & UINT16_MAX;

  length |= (uint16_t)spl[1];


  spl += 2; /* header */

  /* header length in little endian at offset 2 */

  uint16_t header_length = (uint16_t)spl[2];

  header_length |= (uint16_t)spl[3] << 8;


  spl += header_length;

  if (spl > srcData + srcSize)

  {

    WLog_ERR(TAG, "Header size bigger than srcData buffer");

    return 0;

  }


  /* payload size in little endian */

  uint32_t payload_size = (uint32_t)spl[0] << 0;

  payload_size |= (uint32_t)spl[1] << 8;

  payload_size |= (uint32_t)spl[2] << 16;

  payload_size |= (uint32_t)spl[3] << 24;


  if (payload_size > h264_max_size)

  {

    WLog_ERR(TAG, "Payload size bigger than h264_data buffer");

    return 0;

  }


  spl += 4;                                /* payload start */

  const uint8_t* epl = spl + payload_size; /* payload end */


  if (epl > srcData + srcSize)

  {

    WLog_ERR(TAG, "Payload size bigger than srcData buffer");

    return 0;

  }


  length -= header_length + 6;


  /* copy 1st segment to h264 buffer */

  memcpy(ph264, spl, length);

  ph264 += length;

  spl += length;


  /* copy other segments */

  while (epl > spl + 4)

  {

    if (spl[0] != 0xFF || spl[1] != 0xE4)

    {

      WLog_ERR(TAG, "Expected 2nd+ APP4 marker but none found");

      const intptr_t diff = ph264 - h264_data;

      return WINPR_ASSERTING_INT_CAST(size_t, diff);

    }


    /* 2nd+ segment length in big endian */

    length = (uint16_t)(spl[2] << 8) & UINT16_MAX;

    length |= (uint16_t)spl[3];

    if (length < 2)

    {

      WLog_ERR(TAG, "Expected 2nd+ APP4 length >= 2 but have %" PRIu16, length);

      return 0;

    }


    length -= 2;

    spl += 4; /* APP4 marker + length */


    /* copy segment to h264 buffer */

    memcpy(ph264, spl, length);

    ph264 += length;

    spl += length;

  }


  const intptr_t diff = ph264 - h264_data;

  return WINPR_ASSERTING_INT_CAST(size_t, diff);

}

#endif


UINT32 h264_get_max_bitrate(UINT32 height)

{

  static struct Bitrates

  {

    UINT32 height;

    UINT32 bitrate; /* kbps */


  } bitrates[] = {

    /* source: https://livekit.io/webrtc/bitrate-guide (webcam streaming)

     *

     * sorted by height in descending order

     */

    { 1080, 2700 }, { 720, 1250 }, { 480, 700 }, { 360, 400 },

    { 240, 170 },   { 180, 140 },  { 0, 100 },

  };

  const size_t nBitrates = ARRAYSIZE(bitrates);


  for (size_t i = 0; i < nBitrates; i++)

  {

    if (height >= bitrates[i].height)

    {

      UINT32 bitrate = bitrates[i].bitrate;

      WLog_DBG(TAG, "Setting h264 max bitrate: %u kbps", bitrate);

      return bitrate * 1000;

    }

  }


  WINPR_ASSERT(FALSE);

  return 0;

}


static enum AVPixelFormat ecamToAVPixFormat(CAM_MEDIA_FORMAT ecamFormat)

{

  switch (ecamFormat)

  {

    case CAM_MEDIA_FORMAT_YUY2:

      return AV_PIX_FMT_YUYV422;

    case CAM_MEDIA_FORMAT_NV12:

      return AV_PIX_FMT_NV12;

    case CAM_MEDIA_FORMAT_I420:

      return AV_PIX_FMT_YUV420P;

    case CAM_MEDIA_FORMAT_RGB24:

      return AV_PIX_FMT_RGB24;

    case CAM_MEDIA_FORMAT_RGB32:

      return AV_PIX_FMT_RGB32;

    default:

      WLog_ERR(TAG, "Unsupported ecamFormat %d", ecamFormat);

      return AV_PIX_FMT_NONE;

  }

}


static BOOL ecam_init_sws_context(CameraDeviceStream* stream, enum AVPixelFormat pixFormat)

{

  WINPR_ASSERT(stream);


  if (stream->sws)

    return TRUE;


  /* replacing deprecated JPEG formats, still produced by decoder */

  switch (pixFormat)

  {

    case AV_PIX_FMT_YUVJ411P:

      pixFormat = AV_PIX_FMT_YUV411P;

      break;


    case AV_PIX_FMT_YUVJ420P:

      pixFormat = AV_PIX_FMT_YUV420P;

      break;


    case AV_PIX_FMT_YUVJ422P:

      pixFormat = AV_PIX_FMT_YUV422P;

      break;


    case AV_PIX_FMT_YUVJ440P:

      pixFormat = AV_PIX_FMT_YUV440P;

      break;


    case AV_PIX_FMT_YUVJ444P:

      pixFormat = AV_PIX_FMT_YUV444P;

      break;


    default:

      break;

  }


  const int width = (int)stream->currMediaType.Width;

  const int height = (int)stream->currMediaType.Height;


  const enum AVPixelFormat outPixFormat =

      h264_context_get_option(stream->h264, H264_CONTEXT_OPTION_HW_ACCEL) ? AV_PIX_FMT_NV12

                                                                          : AV_PIX_FMT_YUV420P;


  stream->sws =

      sws_getContext(width, height, pixFormat, width, height, outPixFormat, 0, NULL, NULL, NULL);

  if (!stream->sws)

  {

    WLog_ERR(TAG, "sws_getContext failed");

    return FALSE;

  }


  return TRUE;

}


static BOOL ecam_encoder_compress_h264(CameraDeviceStream* stream, const BYTE* srcData,

                                       size_t srcSize, BYTE** ppDstData, size_t* pDstSize)

{

  UINT32 dstSize = 0;

  BYTE* srcSlice[4] = { 0 };

  int srcLineSizes[4] = { 0 };

  BYTE* yuvData[3] = { 0 };

  UINT32 yuvLineSizes[3] = { 0 };

  prim_size_t size = { stream->currMediaType.Width, stream->currMediaType.Height };

  CAM_MEDIA_FORMAT inputFormat = streamInputFormat(stream);

  enum AVPixelFormat pixFormat = AV_PIX_FMT_NONE;


#if defined(WITH_INPUT_FORMAT_H264)

  if (inputFormat == CAM_MEDIA_FORMAT_MJPG_H264)

  {

    const size_t rc =

        demux_uvcH264(srcData, srcSize, stream->h264Frame, stream->h264FrameMaxSize);

    dstSize = WINPR_ASSERTING_INT_CAST(uint32_t, rc);

    *ppDstData = stream->h264Frame;

    *pDstSize = dstSize;

    return dstSize > 0;

  }

  else

#endif


#if defined(WITH_INPUT_FORMAT_MJPG)

      if (inputFormat == CAM_MEDIA_FORMAT_MJPG)

  {

    stream->avInputPkt->data = WINPR_CAST_CONST_PTR_AWAY(srcData, uint8_t*);

    WINPR_ASSERT(srcSize <= INT32_MAX);

    stream->avInputPkt->size = (int)srcSize;


    if (avcodec_send_packet(stream->avContext, stream->avInputPkt) < 0)

    {

      WLog_ERR(TAG, "avcodec_send_packet failed");

      return FALSE;

    }


    if (avcodec_receive_frame(stream->avContext, stream->avOutFrame) < 0)

    {

      WLog_ERR(TAG, "avcodec_receive_frame failed");

      return FALSE;

    }


    for (size_t i = 0; i < 4; i++)

    {

      srcSlice[i] = stream->avOutFrame->data[i];

      srcLineSizes[i] = stream->avOutFrame->linesize[i];

    }


    /* get pixFormat produced by MJPEG decoder */

    pixFormat = stream->avContext->pix_fmt;

  }

  else

#endif

  {

    pixFormat = ecamToAVPixFormat(inputFormat);


    if (av_image_fill_linesizes(srcLineSizes, pixFormat, (int)size.width) < 0)

    {

      WLog_ERR(TAG, "av_image_fill_linesizes failed");

      return FALSE;

    }


    if (av_image_fill_pointers(srcSlice, pixFormat, (int)size.height,

                               WINPR_CAST_CONST_PTR_AWAY(srcData, BYTE*), srcLineSizes) < 0)

    {

      WLog_ERR(TAG, "av_image_fill_pointers failed");

      return FALSE;

    }

  }


  /* get buffers for YUV420P or NV12 */

  if (h264_get_yuv_buffer(stream->h264, 0, size.width, size.height, yuvData, yuvLineSizes) < 0)

    return FALSE;


  /* convert from source format to YUV420P or NV12 */

  if (!ecam_init_sws_context(stream, pixFormat))

    return FALSE;


  const BYTE* cSrcSlice[4] = { srcSlice[0], srcSlice[1], srcSlice[2], srcSlice[3] };

  if (sws_scale(stream->sws, cSrcSlice, srcLineSizes, 0, (int)size.height, yuvData,

                (int*)yuvLineSizes) <= 0)

    return FALSE;


  /* encode from YUV420P or NV12 to H264 */

  if (h264_compress(stream->h264, ppDstData, &dstSize) < 0)

    return FALSE;


  *pDstSize = dstSize;


  return TRUE;

}


static void ecam_encoder_context_free_h264(CameraDeviceStream* stream)

{

  WINPR_ASSERT(stream);


  if (stream->sws)

  {

    sws_freeContext(stream->sws);

    stream->sws = NULL;

  }


#if defined(WITH_INPUT_FORMAT_MJPG)

  if (stream->avOutFrame)

    av_frame_free(&stream->avOutFrame); /* sets to NULL */


  if (stream->avInputPkt)

  {

    stream->avInputPkt->data = NULL;

    stream->avInputPkt->size = 0;

    av_packet_free(&stream->avInputPkt); /* sets to NULL */

  }


  if (stream->avContext)

    avcodec_free_context(&stream->avContext); /* sets to NULL */

#endif


#if defined(WITH_INPUT_FORMAT_H264)

  if (stream->h264Frame)

  {

    free(stream->h264Frame);

    stream->h264Frame = NULL;

  }

#endif


  if (stream->h264)

  {

    h264_context_free(stream->h264);

    stream->h264 = NULL;

  }

}


#if defined(WITH_INPUT_FORMAT_MJPG)

static BOOL ecam_init_mjpeg_decoder(CameraDeviceStream* stream)

{

  WINPR_ASSERT(stream);


  const AVCodec* avcodec = avcodec_find_decoder(AV_CODEC_ID_MJPEG);

  if (!avcodec)

  {

    WLog_ERR(TAG, "avcodec_find_decoder failed to find MJPEG codec");

    return FALSE;

  }


  stream->avContext = avcodec_alloc_context3(avcodec);

  if (!stream->avContext)

  {

    WLog_ERR(TAG, "avcodec_alloc_context3 failed");

    return FALSE;

  }


  stream->avContext->width = WINPR_ASSERTING_INT_CAST(int, stream->currMediaType.Width);

  stream->avContext->height = WINPR_ASSERTING_INT_CAST(int, stream->currMediaType.Height);


  /* AV_EF_EXPLODE flag is to abort decoding on minor error detection,

   * return error, so we can skip corrupted frames, if any */

  stream->avContext->err_recognition |= AV_EF_EXPLODE;


  if (avcodec_open2(stream->avContext, avcodec, NULL) < 0)

  {

    WLog_ERR(TAG, "avcodec_open2 failed");

    return FALSE;

  }


  stream->avInputPkt = av_packet_alloc();

  if (!stream->avInputPkt)

  {

    WLog_ERR(TAG, "av_packet_alloc failed");

    return FALSE;

  }


  stream->avOutFrame = av_frame_alloc();

  if (!stream->avOutFrame)

  {

    WLog_ERR(TAG, "av_frame_alloc failed");

    return FALSE;

  }


  return TRUE;

}

#endif


static BOOL ecam_encoder_context_init_h264(CameraDeviceStream* stream)

{

  WINPR_ASSERT(stream);


#if defined(WITH_INPUT_FORMAT_H264)

  if (streamInputFormat(stream) == CAM_MEDIA_FORMAT_MJPG_H264)

  {

    stream->h264FrameMaxSize = 1ULL * stream->currMediaType.Width *

                               stream->currMediaType.Height; /* 1 byte per pixel */

    stream->h264Frame = (BYTE*)calloc(stream->h264FrameMaxSize, sizeof(BYTE));

    return TRUE; /* encoder not needed */

  }

#endif


  if (!stream->h264)

    stream->h264 = h264_context_new(TRUE);


  if (!stream->h264)

  {

    WLog_ERR(TAG, "h264_context_new failed");

    return FALSE;

  }


  if (!h264_context_set_option(stream->h264, H264_CONTEXT_OPTION_USAGETYPE,

                               H264_CAMERA_VIDEO_REAL_TIME))

    goto fail;


  if (!h264_context_set_option(stream->h264, H264_CONTEXT_OPTION_FRAMERATE,

                               stream->currMediaType.FrameRateNumerator /

                                   stream->currMediaType.FrameRateDenominator))

    goto fail;


  if (!h264_context_set_option(stream->h264, H264_CONTEXT_OPTION_BITRATE,

                               h264_get_max_bitrate(stream->currMediaType.Height)))

    goto fail;


  /* Using CQP mode for rate control. It produces more comparable quality

   * between VAAPI and software encoding than VBR mode

   */

  if (!h264_context_set_option(stream->h264, H264_CONTEXT_OPTION_RATECONTROL,

                               H264_RATECONTROL_CQP))

    goto fail;


  /* Using 26 as CQP value. Lower values will produce better quality but

   * higher bitrate; higher values - lower bitrate but degraded quality

   */

  if (!h264_context_set_option(stream->h264, H264_CONTEXT_OPTION_QP, 26))

    goto fail;


  /* Requesting hardware acceleration before calling h264_context_reset */

  if (!h264_context_set_option(stream->h264, H264_CONTEXT_OPTION_HW_ACCEL, TRUE))

    goto fail;


  if (!h264_context_reset(stream->h264, stream->currMediaType.Width,

                          stream->currMediaType.Height))

  {

    WLog_ERR(TAG, "h264_context_reset failed");

    goto fail;

  }


#if defined(WITH_INPUT_FORMAT_MJPG)

  if (streamInputFormat(stream) == CAM_MEDIA_FORMAT_MJPG && !ecam_init_mjpeg_decoder(stream))

    goto fail;

#endif


  return TRUE;


fail:

  ecam_encoder_context_free_h264(stream);

  return FALSE;

}


BOOL ecam_encoder_context_init(CameraDeviceStream* stream)

{

  CAM_MEDIA_FORMAT format = streamOutputFormat(stream);


  switch (format)

  {

    case CAM_MEDIA_FORMAT_H264:

      return ecam_encoder_context_init_h264(stream);


    default:

      WLog_ERR(TAG, "Unsupported output format %d", format);

      return FALSE;

  }

}


BOOL ecam_encoder_context_free(CameraDeviceStream* stream)

{

  CAM_MEDIA_FORMAT format = streamOutputFormat(stream);

  switch (format)

  {

    case CAM_MEDIA_FORMAT_H264:

      ecam_encoder_context_free_h264(stream);

      break;


    default:

      return FALSE;

  }

  return TRUE;

}


BOOL ecam_encoder_compress(CameraDeviceStream* stream, const BYTE* srcData, size_t srcSize,

                           BYTE** ppDstData, size_t* pDstSize)

{

  CAM_MEDIA_FORMAT format = streamOutputFormat(stream);

  switch (format)

  {

    case CAM_MEDIA_FORMAT_H264:

      return ecam_encoder_compress_h264(stream, srcData, srcSize, ppDstData, pDstSize);

    default:

      WLog_ERR(TAG, "Unsupported output format %d", format);

      return FALSE;

  }

}

CameraDeviceStream
Definition camera.h:98

prim_size_t
Definition primitives.h:74