H264 encoding with low latency

Hello. We are streaming h264 compressed video (1080p, 30fps) from PX2. We are using two configurations, please see the table below.

Configuration # | Encoder                      | Decoder                | Latency
----------------------------------------------------------------------------------------
              1 | gstreamer(nvmediah264videnc) | gstreamer(vaapidecode) | 90ms
              2 | nvmedia API                  | gstreamer(vaapidecode) | 160ms

We would like to configure encoding process with nvmedia API (configuration #2) to reduce the latency as much as possible. Down below is our code for creating and configuring the encoder.

void Encoder::CreateEncoder() noexcept
{
    NVM_SURF_FMT_DEFINE_ATTR(surfaceFormatAttributes);
    NVM_SURF_FMT_SET_ATTR_YUV(
        surfaceFormatAttributes, YUV, 420, SEMI_PLANAR, UINT, 8, BL);
    NvMediaSurfaceType surfaceType = NvMediaSurfaceFormatGetType(
        surfaceFormatAttributes, NVM_SURF_FMT_ATTR_MAX);
    if (surfaceType == NvMediaSurfaceType_Unsupported)
        ERROR("NvMediaSurfaceFormatGetType() failed");

    NvMediaSurfAllocAttr surfAllocAttrs[] = {
        { NVM_SURF_ATTR_WIDTH, mWidth },
        { NVM_SURF_ATTR_HEIGHT, mHeight },
        { NVM_SURF_ATTR_CPU_ACCESS, NVM_SURF_ATTR_CPU_ACCESS_UNCACHED }
    };
    const int numSurfAllocAttrs =
        sizeof(surfAllocAttrs) / sizeof(surfAllocAttrs[0]);
    DEBUG("Creating video surface ...");
    mVideoSurface = std::shared_ptr<NvMediaVideoSurface>(
        NvMediaVideoSurfaceCreateNew(
            mDevice.get(), surfaceType, surfAllocAttrs, numSurfAllocAttrs, 0
        ),
        [] (NvMediaVideoSurface * surface) {
            if (surface != nullptr)
            {
                DEBUG("Destroying video surface ...");
                NvMediaVideoSurfaceDestroy(surface);
            }
        });
    if (!mVideoSurface)
        ERROR("NvMediaVideoSurfaceCreateNew() failed");

    NvMediaEncodeInitializeParamsH264 params;
    std::memset(&params, 0, sizeof(NvMediaEncodeInitializeParamsH264));
    params.encodeHeight = mHeight;
    params.encodeWidth = mWidth;
    params.enableLimitedRGB = 0;
    params.frameRateNum = 30;
    params.frameRateDen = 1;
    params.profile = NVMEDIA_ENCODE_PROFILE_BASELINE;
    params.level = NVMEDIA_ENCODE_LEVEL_H264_42;
    params.maxNumRefFrames = 1;
    params.enableExternalMEHints = NVMEDIA_FALSE;
    params.enableSliceEncode = NVMEDIA_TRUE;

    DEBUG("Creating encoder ...");
    mEncoder = std::shared_ptr<NvMediaVideoEncoder>(
        NvMediaVideoEncoderCreate(
            mDevice.get(),
            NVMEDIA_VIDEO_ENCODE_CODEC_H264,
            &params,
            surfaceType,
            0,
            2,
            NVMEDIA_ENCODER_INSTANCE_AUTO),
        [] (NvMediaVideoEncoder * encoder) {
            if (encoder != nullptr)
            {
                DEBUG("Destroying encoder ...");
                NvMediaVideoEncoderDestroy(encoder);
            }
        });
    if (!mEncoder)
        ERROR("NvMediaVideoEncoderCreate() failed");

    NvMediaEncodeConfigH264VUIParams vuiParams;
    std::memset(&vuiParams, 0, sizeof(NvMediaEncodeConfigH264VUIParams));

    NvMediaEncodeRCParams rcParams;
    std::memset(&rcParams, 0, sizeof(NvMediaEncodeRCParams));
    rcParams.numBFrames = 0;
    rcParams.rateControlMode = NVMEDIA_ENCODE_PARAMS_RC_VBR;
    rcParams.params.vbr.averageBitRate = BITRATE;
    rcParams.params.vbr.maxBitRate = BITRATE * 2u;

    NvMediaEncodeConfigH264 config;
    std::memset(&config, 0, sizeof(NvMediaEncodeConfigH264));
    config.gopLength = 15;
    config.rcParams = rcParams;
    config.idrPeriod = 0;
    config.h264VUIParameters = &vuiParams;
    config.quality = NVMEDIA_ENCODE_QUALITY_L0;
    config.features =
        NVMEDIA_ENCODE_CONFIG_H264_ENABLE_RTP_MODE_OUTPUT |
        NVMEDIA_ENCODE_CONFIG_H264_ENABLE_SLICE_LEVEL_OUTPUT |
        NVMEDIA_ENCODE_CONFIG_H264_ENABLE_INTRA_REFRESH;
    config.bdirectMode = NVMEDIA_ENCODE_H264_BDIRECT_MODE_DISABLE;
    config.pocType = NVMEDIA_ENCODE_H264_POC_TYPE_0;
    config.adaptiveTransformMode =
        NVMEDIA_ENCODE_H264_ADAPTIVE_TRANSFORM_DISABLE;
    config.intraRefreshCnt = 5;
    config.intraRefreshPeriod = 15;

    DEBUG("Setting encoder configuration ...");
    NvMediaStatus status = NvMediaVideoEncoderSetConfiguration(
        mEncoder.get(), &config);
    if (status != NVMEDIA_STATUS_OK)
        ERROR("NvMediaVideoEncoderSetConfiguration() failed");
}

SDK in use is 5.0.5.0b. Please advise.

Dear xynkin,
Did you check Image encoder section in NvMedia documentation. Also, Can you share how you are measuring the latency?

  1. I have read https://docs.nvidia.com/drive/archive/5.0.5.0bL/nvvib_docs/index.html#page/NVIDIA%20DRIVE%20Linux%20SDK%20Development%20Guide%2FMultimedia%2Fmultimedia_guide.html%23wwpID0E0PH0HA and https://docs.nvidia.com/drive/archive/5.0.5.0bL/nvvib_docs/index.html#page/NVIDIA%20DRIVE%20Linux%20SDK%20Development%20Guide%2FMultimedia%2Fnvmedia_nvmvid_enc.html%23 Is there any other documentation for NvMedia encoder? Could you point it out?
  2. The latency was calculated as difference in time between the moments when raw frame was captured (before encoding) and when received frame was decoded. So, it includes encoding, transferring and decoding times. But absolute values are not important in this case, what is significant is the difference in measured latency between #1 and #2 configurations. The only difference in the setup was that in the first case gstreamer with nvmediah264videnc plugin was used, and in the second case – NvMedia API.

Dear xynkin,
Could you please check changing config.pocType from NVMEDIA_ENCODE_H264_POC_TYPE_0 to NVMEDIA_ENCODE_H264_POC_TYPE_2

Hello. Thank you for the suggestion. I will test it out and let you know the result.

Hello. I have tested config.pocType set to NVMEDIA_ENCODE_H264_POC_TYPE_AUTOSELECT, NVMEDIA_ENCODE_H264_POC_TYPE_0 and NVMEDIA_ENCODE_H264_POC_TYPE_2 several times. It seems to have no noticeable effect on latency (it is still around 160ms in all three cases).

Hello. After changing the decoder from gstreamer to NVIDIA Video Codec SDK the latency has reduced. Also, config.pocType does make a difference now. Below are the best settings I’ve found so far, the latency is around 20ms (just a relative reference to my former one).

Configuration # | Encoder                      | Decoder                | Latency
----------------------------------------------------------------------------------------
              1 | gstreamer(nvmediah264videnc) | gstreamer(vaapidecode) | 90ms
              2 | nvmedia API                  | gstreamer(vaapidecode) | 160ms
              3 | nvmedia API                  | Video Codec SDK        | 20ms
NVM_SURF_FMT_DEFINE_ATTR(surfaceFormatAttributes);
NVM_SURF_FMT_SET_ATTR_YUV (
    surfaceFormatAttributes, YUV, 420, SEMI_PLANAR, UINT, 8, BL
);
NvMediaSurfaceType surfaceType = NvMediaSurfaceFormatGetType (
    surfaceFormatAttributes, NVM_SURF_FMT_ATTR_MAX
);
ERROR_IF (surfaceType == NvMediaSurfaceType_Unsupported,
    "NvMediaSurfaceFormatGetType() failed"
);

NvMediaSurfAllocAttr surfAllocAttrs[] = {
    { NVM_SURF_ATTR_WIDTH, mWidth },
    { NVM_SURF_ATTR_HEIGHT, mHeight },
    { NVM_SURF_ATTR_CPU_ACCESS, NVM_SURF_ATTR_CPU_ACCESS_UNCACHED }
};
const uint32_t numSurfAllocAttrs =
    sizeof(surfAllocAttrs)/sizeof(surfAllocAttrs[0]);
DEBUG("Creating video surface ...");
mVideoSurface = std::shared_ptr<NvMediaVideoSurface> (
    NvMediaVideoSurfaceCreateNew (
        mDevice, surfaceType, surfAllocAttrs, numSurfAllocAttrs, 0
    ),
    [] (NvMediaVideoSurface *surface) {
        if( surface != nullptr ){
            DEBUG("Destroying video surface ...");
            NvMediaVideoSurfaceDestroy(surface);
        }
    });
ERROR_IF (mVideoSurface == nullptr,
    "NvMediaVideoSurfaceCreateNew() failed"
);

NvMediaEncodeInitializeParamsH264 params;
std::memset(&params, 0, sizeof(NvMediaEncodeInitializeParamsH264));
params.encodeHeight = mHeight;
params.encodeWidth = mWidth;
params.enableLimitedRGB = 0;
params.frameRateNum = src_params.framerate;
params.frameRateDen = 1;
params.profile = NVMEDIA_ENCODE_LEVEL_AUTOSELECT;
params.level = NVMEDIA_ENCODE_PROFILE_AUTOSELECT;
params.maxNumRefFrames = 1;
params.enableExternalMEHints = NVMEDIA_FALSE;
params.enableSliceEncode = NVMEDIA_TRUE;

DEBUG("Creating encoder ...");
mEncoder = std::shared_ptr<NvMediaVideoEncoder> (
    NvMediaVideoEncoderCreate (
        mDevice,
        NVMEDIA_VIDEO_ENCODE_CODEC_H264,
        &params,
        surfaceType,
        2,
        2,
        NVMEDIA_ENCODER_INSTANCE_AUTO),
    [] (NvMediaVideoEncoder *encoder) {
        if( encoder != nullptr ){
            DEBUG("Destroying encoder ...");
            NvMediaVideoEncoderDestroy(encoder);
        }
    }
);
ERROR_IF(mEncoder == nullptr, "NvMediaVideoEncoderCreate() failed");

NvMediaEncodeConfigH264VUIParams vuiParams;
std::memset(&vuiParams, 0, sizeof(NvMediaEncodeConfigH264VUIParams));

NvMediaEncodeRCParams rcParams;
std::memset(&rcParams, 0, sizeof(NvMediaEncodeRCParams));
rcParams.numBFrames = 0;
rcParams.rateControlMode = NVMEDIA_ENCODE_PARAMS_RC_CBR;
rcParams.params.vbr.averageBitRate = src_params.bitrate;
rcParams.params.vbr.vbvBufferSize =
    src_params.bitrate/params.frameRateNum;

NvMediaEncodeConfigH264 config;
std::memset(&config, 0, sizeof(NvMediaEncodeConfigH264));
config.rcParams = rcParams;
config.h264VUIParameters = &vuiParams;
config.gopLength = NVMEDIA_ENCODE_INFINITE_GOPLENGTH;
config.idrPeriod = NVMEDIA_ENCODE_INFINITE_GOPLENGTH;
config.quality = NVMEDIA_ENCODE_QUALITY_L0;
config.entropyCodingMode =
    NVMEDIA_ENCODE_H264_ENTROPY_CODING_MODE_CABAC;
config.adaptiveTransformMode =
    NVMEDIA_ENCODE_H264_ADAPTIVE_TRANSFORM_ENABLE;
config.bdirectMode = NVMEDIA_ENCODE_H264_BDIRECT_MODE_DISABLE;
config.pocType = NVMEDIA_ENCODE_H264_POC_TYPE_2;
config.repeatSPSPPS = NVMEDIA_ENCODE_SPSPPS_REPEAT_DISABLED;
config.intraRefreshCnt = 4;
config.intraRefreshPeriod = 8;
config.features =
    NVMEDIA_ENCODE_CONFIG_H264_ENABLE_RTP_MODE_OUTPUT |
    NVMEDIA_ENCODE_CONFIG_H264_ENABLE_SLICE_LEVEL_OUTPUT |
    NVMEDIA_ENCODE_CONFIG_H264_ENABLE_INTRA_REFRESH |
    NVMEDIA_ENCODE_CONFIG_H264_ENABLE_CONSTRANED_ENCODING;

DEBUG("Setting encoder configuration ...");
NvMediaStatus status = NvMediaVideoEncoderSetConfiguration (
    mEncoder.get(), &config
);
ERROR_IF (status != NVMEDIA_STATUS_OK,
    "NvMediaVideoEncoderSetConfiguration() failed"
);

And here are the parameters for NvMediaVideoEncoderFeedFrame

NvMediaEncodePicParamsH264 picParams;
memset(&picParams, 0, sizeof(NvMediaEncodePicParamsH264));
picParams.frameRateNum = 30;
picParams.frameRateDen = 1;
picParams.encodePicFlags = NVMEDIA_ENCODE_PIC_FLAG_OUTPUT_SPSPPS|
    NVMEDIA_ENCODE_PIC_FLAG_CONSTRAINED_FRAME;