nvEncUnlockInputBuffer() causes segfault

I’m looking into using the nvidia Video Codec SDK to encode ARGB buffers into H264. Once I got ARGB CPU buffers working I want to look into using DirectX/OpenGL buffers for input, but for now to get things going I’m using ARGB buffers. Allocating input buffers, output bitstream buffers and initializing the encoder works fine.

But when I try to fill an input buffer I get a segfault. First I lock the buffer, then fill it with some arbitrary ARGB data, then unlock. But as soon as I call nvEncUnlockInputBuffer() I get a segfault. I have pasted my code here, but I’ll attach it below too.

I’m running Arch Linux with the following versions installed:

Linux arch680 4.16.13-2-ARCH #1 SMP PREEMPT Fri Jun 1 18:46:11 UTC 2018 x86_64 GNU/Linux

01:00.0 VGA compatible controller: NVIDIA Corporation GM204 [GeForce GTX 980] (rev a1) (prog-if 00 [VGA controller])
	Kernel driver in use: nvidia
	Kernel modules: nouveau, nvidia_drm, nvidia

    NVIDIA's GPU programming toolkit
local/libvdpau 1.1.1+3+ga21bf7a-1
    Nvidia VDPAU library
local/libxnvctrl 396.24-1
    NVIDIA NV-CONTROL X extension
local/nvidia 396.24-7
    NVIDIA drivers for linux
local/nvidia-settings 396.24-1
    Tool for configuring the NVIDIA graphics driver
local/nvidia-utils 396.24-2
    NVIDIA drivers utilities
local/opencl-nvidia 396.24-2
    OpenCL implemention for NVIDIA

Any ideas what might cause this segfault? Or thoughts on how to debug this?

My experimental code (same as the link above).

#include <stdio.h>
#include <stdlib.h>
#include <poly/Log.h>
#include <poly/CudaCore.h>
#include <NvEncoder/nvEncodeAPI.h>
#include <dlfcn.h>

using namespace poly;

static std::string nvenc_guid_to_string( GUID guid);
static std::string nvenc_bufferformat_to_string(uint32_t fmt);

int main(int argc, char* argv[]) {

  poly_log_init(1024, argc, argv);

  /* get a cuda device that the encoder needs. */
  SX_DEBUG("Creating cuda objects.");
  CudaCore cuda_core;
  if (0 != cuda_core.init()) {
  CudaDevice* cuda_device = nullptr;
  if (0 != cuda_core.createDevice(0, &cuda_device)) {
  CudaContext* cuda_ctx = nullptr;
  if (0 != cuda_core.createContext(cuda_device, 0, &cuda_ctx)) {

  /* init the encode api */
  SX_DEBUG("Creating encoder.");

  result = NvEncodeAPICreateInstance(&enc_ctx);
  if (NV_ENC_SUCCESS != result) {
    SX_ERROR("Failed to create an encoder instance.");

  /* create encoder instance */
  encode_cfg.device = (void*)cuda_ctx->getContextHandle();
  encode_cfg.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
  encode_cfg.apiVersion = NVENCAPI_VERSION;

  void* encoder = nullptr;
  result = enc_ctx.nvEncOpenEncodeSessionEx(&encode_cfg, (void**)&encoder);
  if (NV_ENC_SUCCESS != result) {
    SX_ERROR("Failed to open an encoder session.");
  if (nullptr == encoder) {
    SX_ERROR("encoder is nullptr.");

  /* ------------------------------------------------------------------------------------ */
  /* C A P A B I L I T I E S 
  /* ------------------------------------------------------------------------------------ */
  /* get encode guid, e.g. h264/hevc. */
  uint32_t enc_guid_count = 0;
  result = enc_ctx.nvEncGetEncodeGUIDCount(encoder, &enc_guid_count);
  if (NV_ENC_SUCCESS != result) {
    SX_ERROR("Failed to get the GUID count.");
  GUID* enc_guids = (GUID*)malloc(enc_guid_count * sizeof(GUID));
  if (nullptr == enc_guids) {
    SX_ERROR("Failed to allocate our (temporary) buffer to store guids.");
  /* @todo we should check if e.g. h264 is supported. */
  uint32_t enc_guid_count_returned = 0;
  result = enc_ctx.nvEncGetEncodeGUIDs(encoder, enc_guids, enc_guid_count, &enc_guid_count_returned);
  if (NV_ENC_SUCCESS != result) {
    SX_ERROR("Failed to get the encoder GUIDs.");

  SX_DEBUG("Encode GUID count %u and returned %u.", enc_guid_count, enc_guid_count_returned);
  for (uint32_t i = 0; i < enc_guid_count_returned; ++i) {
    SX_DEBUG("Supported encoder GUID: %s", nvenc_guid_to_string(enc_guids[i]).c_str());

  /* @todo currently we hardcoded these GUIDs. we should check if we can use them */
  GUID guid_h264 = NV_ENC_CODEC_H264_GUID;
  uint32_t width = 1280;
  uint32_t height = 720;

  /* get preset guids */
  uint32_t preset_guid_count = 0;
  result = enc_ctx.nvEncGetEncodePresetCount(encoder, guid_h264, &preset_guid_count);
  if (NV_ENC_SUCCESS != result) {
    SX_ERROR("Failed to get the encoder preset count.");
  SX_DEBUG("Supported presets %u.", preset_guid_count);

  GUID* preset_guids = (GUID*) malloc(preset_guid_count * sizeof(GUID));
  if (nullptr == preset_guids) {
    SX_ERROR("Failed to allocate our preset_guids buffer.");

  uint32_t preset_guid_count_returned = 0;
  result = enc_ctx.nvEncGetEncodePresetGUIDs(encoder, guid_h264, preset_guids, preset_guid_count, &preset_guid_count_returned);
  if (NV_ENC_SUCCESS != result) {
    SX_ERROR("Failed to get the encode preset guids.");

  for (uint32_t i = 0; i < preset_guid_count_returned; ++i) {
    SX_DEBUG("Supported preset %s", nvenc_guid_to_string(preset_guids[i]).c_str());
  /* get encode profiles. */
  uint32_t profile_guid_count = 0;
  result = enc_ctx.nvEncGetEncodeProfileGUIDCount(encoder, guid_h264, &profile_guid_count);
  if (NV_ENC_SUCCESS != result) {
    SX_ERROR("Failed to get the encoder profile guid count.");

  GUID* profile_guids = (GUID*)malloc(profile_guid_count * sizeof(GUID));
  if (nullptr == profile_guids) {
    SX_ERROR("Failed to allocate our profile guids buffer.");

  uint32_t profile_guid_count_returned = 0;
  result = enc_ctx.nvEncGetEncodeProfileGUIDs(encoder, guid_h264, profile_guids, profile_guid_count, &profile_guid_count_returned);
  if (NV_ENC_SUCCESS != result) {
    SX_ERROR("Failed to get profile GUIDs.");
  SX_DEBUG("Profile GUID count %u and returned %u", profile_guid_count, profile_guid_count_returned);
  for (uint32_t i = 0; i < profile_guid_count_returned; ++i) {
    SX_DEBUG("Supported profile GUID: %s", nvenc_guid_to_string(profile_guids[i]).c_str());

  /* get input formats */
  uint32_t format_guid_count = 0;
  result = enc_ctx.nvEncGetInputFormatCount(encoder, guid_h264, &format_guid_count);
  if (NV_ENC_SUCCESS != result) {
    SX_ERROR("Failed to get the input format count.");

  NV_ENC_BUFFER_FORMAT* formats = (NV_ENC_BUFFER_FORMAT*)(malloc(format_guid_count * sizeof(NV_ENC_BUFFER_FORMAT)));
  if (nullptr == formats) {
    SX_ERROR("Failed to allocate our array for the supported buffer formats.");

  uint32_t format_guid_count_returned = 0;
  result = enc_ctx.nvEncGetInputFormats(encoder, guid_h264, formats, format_guid_count, &format_guid_count_returned);
  if (NV_ENC_SUCCESS != result) {
    SX_ERROR("Failed to get the input formats.");
  SX_DEBUG("Supported input formats %u.", format_guid_count);
  for (uint32_t i = 0; i < format_guid_count_returned; ++i) {
    SX_DEBUG("Supported format %s.", nvenc_bufferformat_to_string(formats[i]).c_str());

  /* ------------------------------------------------------------------------------------ */
  /* I N I T I A L I Z E   E N C O D E R
  /* ------------------------------------------------------------------------------------ */

  /* get configs based on our selected preset and codec. */
  result = enc_ctx.nvEncGetEncodePresetConfig(encoder, guid_h264, guid_preset, &preset_config);
  if (NV_ENC_SUCCESS != result) {
    SX_ERROR("Failed to get the preset config.");

  /* create init params.
     - preset_config.presetCfg  ==> `NV_ENC_CONFIG presetCfg`
     - init_params.encodeConfig ==> `NV_ENC_CONFIG* encodeConfig`
  NV_ENC_INITIALIZE_PARAMS init_params = {};
  init_params.encodeConfig = &preset_config.presetCfg;
  init_params.version = NV_ENC_INITIALIZE_PARAMS_VER;
  init_params.encodeGUID = guid_h264;
  init_params.presetGUID = guid_preset;
  init_params.encodeWidth = width;
  init_params.encodeHeight = height;
  init_params.darWidth = width;
  init_params.darHeight = height;
  init_params.frameRateNum = 30;
  init_params.frameRateDen = 1; 
  init_params.enableEncodeAsync = 0;        /* @todo this is something worth looking into. */
  init_params.enablePTD = 1;                /* when we want to supply the input buffers in display order we set this to 1. */
  init_params.reportSliceOffsets = 0;       /* @todo check meaning of this prop. */
  init_params.enableSubFrameWrite= 0;       /* @todo check meaning of this prop. */
  init_params.enableExternalMEHints = 0;    /* @todo */
  init_params.enableMEOnlyMode = 0;         /* @todo */
  init_params.enableWeightedPrediction = 0; /* @todo */
  init_params.maxEncodeWidth = width;
  init_params.maxEncodeHeight = height;

  /* Use average bitrate. Try to use `averageBitRate` and don't exceed `maxBitRate`. */
  init_params.encodeConfig->rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR;
  init_params.encodeConfig->rcParams.averageBitRate = 1000000;
  init_params.encodeConfig->rcParams.maxBitRate = 2000000;
  result = enc_ctx.nvEncInitializeEncoder(encoder, &init_params);
  if (NV_ENC_SUCCESS != result) {
    SX_ERROR("Failed to initialize the encoder.");

     I found this in the NVENCEncoder.cpp from OBS. I'm not sure
     why they use this formula.  The
     `NVENC_VideoEncoder_API_ProgGuide.pdf` describes that you
     should at least allocate (1 + Nb) input/output buffers,
     where `Nb` is the number of B frames between successive P frames.

     Also, it seems that the NVENCEncoder.cpp from OBS is forcing the 
     buffers to be 32-byte aligned. I found this in the forum too

  int num_macroblocks = ((width + 15) / 16) * ((height + 15) / 16);
  int max_surfaces = (num_macroblocks >= 8160) ? 16 : 32;
  NV_ENC_INPUT_PTR* input_buffers = (NV_ENC_INPUT_PTR*) malloc(max_surfaces * sizeof(NV_ENC_INPUT_PTR));
  NV_ENC_OUTPUT_PTR* output_buffers = (NV_ENC_OUTPUT_PTR*) malloc(max_surfaces * sizeof(NV_ENC_OUTPUT_PTR));

  SX_DEBUG("Num surfaces %u", max_surfaces);
  for (int i = 0; i < max_surfaces; ++i) {

    /* input buffer */
    NV_ENC_CREATE_INPUT_BUFFER create_buffer_info = {};
    create_buffer_info.version = NV_ENC_CREATE_INPUT_BUFFER_VER;
    create_buffer_info.width = (width + 31) & ~31;                 /* Buffers need to be 32-byte aligned. */
    create_buffer_info.height = (height + 31) & ~31;               /* Buffers need to be 32-byte aligned. */
    create_buffer_info.memoryHeap = NV_ENC_MEMORY_HEAP_AUTOSELECT; /* this is deprecated, but lets set a sane value. */
    create_buffer_info.bufferFmt = NV_ENC_BUFFER_FORMAT_ARGB;
    result = enc_ctx.nvEncCreateInputBuffer(encoder, &create_buffer_info);

    if (NV_ENC_SUCCESS != result) {
      SX_ERROR("Failed to allocate an input buffer.");
    if (nullptr == create_buffer_info.inputBuffer) {
      SX_ERROR("The created input buffer is nullptr.");

    /* output buffer. */
    NV_ENC_CREATE_BITSTREAM_BUFFER create_bitstream_info = {};
    create_bitstream_info.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
    result = enc_ctx.nvEncCreateBitstreamBuffer(encoder, &create_bitstream_info);
    if (NV_ENC_SUCCESS != result) {
      SX_ERROR("Failed to create output buffer.");
    input_buffers[i] = create_buffer_info.inputBuffer;
    output_buffers[i] = create_bitstream_info.bitstreamBuffer;

  /* get SPS and PPS */
  uint32_t sps_pps_nbytes = 0;
  char sps_pps[NV_MAX_SEQ_HDR_LEN] = {};

  seq_payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
  seq_payload.spsId = 0;
  seq_payload.ppsId = 0;
  seq_payload.inBufferSize = sizeof(sps_pps);
  seq_payload.spsppsBuffer = (void*) sps_pps;
  seq_payload.outSPSPPSPayloadSize = &sps_pps_nbytes;

  result = enc_ctx.nvEncGetSequenceParams(encoder, &seq_payload);
  if (NV_ENC_SUCCESS != result) {
    SX_ERROR("Failed to get the sequence params.");

  SX_VERBOSE("SPS/PPS size: %u", sps_pps_nbytes);

  /* fill a frame with some color. */
  NV_ENC_PIC_PARAMS pic_params = {};
  pic_params.version = NV_ENC_PIC_PARAMS_VER;
  pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
  pic_params.inputBuffer = input_buffers[0];
  pic_params.bufferFmt = NV_ENC_BUFFER_FORMAT_ARGB;
  pic_params.inputWidth = width;
  pic_params.inputHeight = height;
  pic_params.outputBitstream = output_buffers[0];

  NV_ENC_LOCK_INPUT_BUFFER input_buffer_lock = {};
  input_buffer_lock.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
  input_buffer_lock.doNotWait = 0; /* do wait ^.^, make `nvEncLockInputBuffer()` blocking. */
  input_buffer_lock.inputBuffer = input_buffers[0];

    //    result = enc_ctx.nvEncLockInputBuffer(encoder, &input_buffer_lock);
    result = enc_ctx.nvEncLockInputBuffer(encoder, &input_buffer_lock);
    if (NV_ENC_SUCCESS != result) {
      SX_ERROR("Failed to lock an input buffer.");

    SX_DEBUG("Buffer pitch: %u", input_buffer_lock.pitch);
    uint8_t* pixels = (uint8_t*) input_buffer_lock.bufferDataPtr;
    for (int j = 0; j < height; ++j) {
      for (int i = 0; i < width; ++i) {
        int dx = (j * input_buffer_lock.pitch) + i * 4;
        pixels[dx + 0] = 0xFF;
        pixels[dx + 1] = 0x00;
        pixels[dx + 2] = 0x00;
        pixels[dx + 3] = 0xFF;

       This causes a segfault. GDB shows me:
       Thread 1 "test-nvenc-v0-d" received signal SIGSEGV, Segmentation fault.
       0x00007ffff687d268 in ?? () from /usr/lib/libnvcuvid.so.1
       (gdb) bt
       #0  0x00007ffff687d268 in ?? () from /usr/lib/libnvcuvid.so.1
       #1  0x00005555555588d1 in main (argc=3, argv=0x7fffffffe838) at test-nvenc-v0.cpp:357
    result = enc_ctx.nvEncUnlockInputBuffer(encoder, &input_buffer_lock);
    if (NV_ENC_SUCCESS != result) {
      SX_ERROR("Failed to unload the input buffer.");
     - free enc_guids
     - free profile_guids
     - free format guids
     - free preset guids
     - free cuda context
     - free cuda device
     - free input_buffers
     - free output_buffers
  return 0;

static bool nvenc_guid_compare(const GUID& a, const GUID& b) {
  return (0 == memcmp((const void*)&a, (const void*)&b, sizeof(b))) ? true : false;

static std::string nvenc_guid_to_string(GUID guid) {
  if (true == nvenc_guid_compare(guid, NV_ENC_CODEC_H264_GUID))                             { return "NV_ENC_CODEC_H264_GUID"; }
  else if (true == nvenc_guid_compare(guid, NV_ENC_CODEC_HEVC_GUID))                        { return "NV_ENC_CODEC_HEVC_GUID"; }
  else if (true == nvenc_guid_compare(guid, NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID))          { return "NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID"; }
  else if (true == nvenc_guid_compare(guid, NV_ENC_H264_PROFILE_BASELINE_GUID))             { return "NV_ENC_H264_PROFILE_BASELINE_GUID"; }
  else if (true == nvenc_guid_compare(guid, NV_ENC_H264_PROFILE_MAIN_GUID))                 { return "NV_ENC_H264_PROFILE_MAIN_GUID"; }
  else if (true == nvenc_guid_compare(guid, NV_ENC_H264_PROFILE_HIGH_GUID))                 { return "NV_ENC_H264_PROFILE_HIGH_GUID"; }
  else if (true == nvenc_guid_compare(guid, NV_ENC_H264_PROFILE_HIGH_444_GUID))             { return "NV_ENC_H264_PROFILE_HIGH_444_GUID"; }
  else if (true == nvenc_guid_compare(guid, NV_ENC_H264_PROFILE_STEREO_GUID))               { return "NV_ENC_H264_PROFILE_STEREO_GUID"; }
  else if (true == nvenc_guid_compare(guid, NV_ENC_H264_PROFILE_SVC_TEMPORAL_SCALABILTY))   { return "NV_ENC_H264_PROFILE_SVC_TEMPORAL_SCALABILTY"; }
  else if (true == nvenc_guid_compare(guid, NV_ENC_H264_PROFILE_PROGRESSIVE_HIGH_GUID))     { return "NV_ENC_H264_PROFILE_PROGRESSIVE_HIGH_GUID"; }
  else if (true == nvenc_guid_compare(guid, NV_ENC_H264_PROFILE_CONSTRAINED_HIGH_GUID))     { return "NV_ENC_H264_PROFILE_CONSTRAINED_HIGH_GUID"; }
  else if (true == nvenc_guid_compare(guid, NV_ENC_HEVC_PROFILE_MAIN_GUID))                 { return "NV_ENC_HEVC_PROFILE_MAIN_GUID"; }
  else if (true == nvenc_guid_compare(guid, NV_ENC_HEVC_PROFILE_MAIN10_GUID))               { return "NV_ENC_HEVC_PROFILE_MAIN10_GUID"; }
  else if (true == nvenc_guid_compare(guid, NV_ENC_HEVC_PROFILE_FREXT_GUID))                { return "NV_ENC_HEVC_PROFILE_FREXT_GUID"; }
  else if (true == nvenc_guid_compare(guid, NV_ENC_PRESET_DEFAULT_GUID))                    { return "NV_ENC_PRESET_DEFAULT_GUID"; }
  else if (true == nvenc_guid_compare(guid, NV_ENC_PRESET_HP_GUID))                         { return "NV_ENC_PRESET_HP_GUID"; }
  else if (true == nvenc_guid_compare(guid, NV_ENC_PRESET_HQ_GUID))                         { return "NV_ENC_PRESET_HQ_GUID"; }
  else if (true == nvenc_guid_compare(guid, NV_ENC_PRESET_BD_GUID))                         { return "NV_ENC_PRESET_BD_GUID"; }
  else if (true == nvenc_guid_compare(guid, NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID))        { return "NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID"; }
  else if (true == nvenc_guid_compare(guid, NV_ENC_PRESET_LOW_LATENCY_HQ_GUID))             { return "NV_ENC_PRESET_LOW_LATENCY_HQ_GUID"; }
  else if (true == nvenc_guid_compare(guid, NV_ENC_PRESET_LOW_LATENCY_HP_GUID))             { return "NV_ENC_PRESET_LOW_LATENCY_HP_GUID"; }
  else if (true == nvenc_guid_compare(guid, NV_ENC_PRESET_LOSSLESS_DEFAULT_GUID))           { return "NV_ENC_PRESET_LOSSLESS_DEFAULT_GUID"; }
  else if (true == nvenc_guid_compare(guid, NV_ENC_PRESET_LOSSLESS_HP_GUID))                { return "NV_ENC_PRESET_LOSSLESS_HP_GUID"; }
  else                                                                                      { return "UNKNOWN"; }

static std::string nvenc_bufferformat_to_string(uint32_t fmt) {
  switch (fmt) {
    case NV_ENC_BUFFER_FORMAT_NV12:          { return "NV_ENC_BUFFER_FORMAT_NV12"; }
    case NV_ENC_BUFFER_FORMAT_YV12:          { return "NV_ENC_BUFFER_FORMAT_YV12"; }
    case NV_ENC_BUFFER_FORMAT_IYUV:          { return "NV_ENC_BUFFER_FORMAT_IYUV"; }
    case NV_ENC_BUFFER_FORMAT_YUV444:        { return "NV_ENC_BUFFER_FORMAT_YUV444"; }
    case NV_ENC_BUFFER_FORMAT_YUV420_10BIT:  { return "NV_ENC_BUFFER_FORMAT_YUV420_10BIT"; }
    case NV_ENC_BUFFER_FORMAT_YUV444_10BIT:  { return "NV_ENC_BUFFER_FORMAT_YUV444_10BIT"; }
    case NV_ENC_BUFFER_FORMAT_ARGB:          { return "NV_ENC_BUFFER_FORMAT_ARGB"; }
    case NV_ENC_BUFFER_FORMAT_ARGB10:        { return "NV_ENC_BUFFER_FORMAT_ARGB10"; }
    case NV_ENC_BUFFER_FORMAT_AYUV:          { return "NV_ENC_BUFFER_FORMAT_AYUV"; }
    case NV_ENC_BUFFER_FORMAT_ABGR:          { return "NV_ENC_BUFFER_FORMAT_ABGR"; }
    case NV_ENC_BUFFER_FORMAT_ABGR10:        { return "NV_ENC_BUFFER_FORMAT_ABGR10"; }
    default:                                 { return "UNKNOWN"; } 

I’m following the “NVENC_VideoEncoder_API_ProgGuide.df” from Jan 2018.

Hi diedrick,

We suspect the bug to be in the application. The application passes the NV_ENC_LOCK_INPUT_BUFFER structure to NvEncUnlockInputBuffer(), instead of handle to the buffer which was locked.
In other words, this line (343) in the code

result = enc_ctx.nvEncUnlockInputBuffer(encoder, &input_buffer_lock);

should be

result = enc_ctx.nvEncUnlockInputBuffer(encoder, input_buffer_lock.inputBuffer);

Please check if this suggestion helps resolves the segfault.

Ryan Park

Thanks rygark. You’re totally right. I was looking at some other code which used nvEncLockInputBuffer() with a NV_ENC_LOCK_INPUT_BUFFER and withouth checking the API I assumed that nvEncUnlockInputBuffer() had the same arguments.