I’m looking into using the nvidia Video Codec SDK to encode ARGB buffers into H264. Once I got ARGB CPU buffers working I want to look into using DirectX/OpenGL buffers for input, but for now to get things going I’m using ARGB buffers. Allocating input buffers, output bitstream buffers and initializing the encoder works fine.
But when I try to fill an input buffer I get a segfault. First I lock the buffer, then fill it with some arbitrary ARGB data, then unlock. But as soon as I call nvEncUnlockInputBuffer() I get a segfault. I have pasted my code here, but I’ll attach it below too.
I’m running Arch Linux with the following versions installed:
Linux arch680 4.16.13-2-ARCH #1 SMP PREEMPT Fri Jun 1 18:46:11 UTC 2018 x86_64 GNU/Linux
01:00.0 VGA compatible controller: NVIDIA Corporation GM204 [GeForce GTX 980] (rev a1) (prog-if 00 [VGA controller])
Kernel driver in use: nvidia
Kernel modules: nouveau, nvidia_drm, nvidia
local/cuda 9.2.88.1-2
NVIDIA's GPU programming toolkit
local/libvdpau 1.1.1+3+ga21bf7a-1
Nvidia VDPAU library
local/libxnvctrl 396.24-1
NVIDIA NV-CONTROL X extension
local/nvidia 396.24-7
NVIDIA drivers for linux
local/nvidia-settings 396.24-1
Tool for configuring the NVIDIA graphics driver
local/nvidia-utils 396.24-2
NVIDIA drivers utilities
local/opencl-nvidia 396.24-2
OpenCL implemention for NVIDIA
Any ideas what might cause this segfault? Or thoughts on how to debug this?
My experimental code (same as the link above).
#include <stdio.h>
#include <stdlib.h>
#include <poly/Log.h>
#include <poly/CudaCore.h>
#include <NvEncoder/nvEncodeAPI.h>
#include <dlfcn.h>
using namespace poly;
static std::string nvenc_guid_to_string( GUID guid);
static std::string nvenc_bufferformat_to_string(uint32_t fmt);
int main(int argc, char* argv[]) {
poly_log_init(1024, argc, argv);
poly_log_add_sink_stdout();
SX_VERBOSE("NVENC TEST V0");
/* get a cuda device that the encoder needs. */
SX_DEBUG("Creating cuda objects.");
CudaCore cuda_core;
if (0 != cuda_core.init()) {
exit(EXIT_FAILURE);
}
CudaDevice* cuda_device = nullptr;
if (0 != cuda_core.createDevice(0, &cuda_device)) {
exit(EXIT_FAILURE);
}
CudaContext* cuda_ctx = nullptr;
if (0 != cuda_core.createContext(cuda_device, 0, &cuda_ctx)) {
exit(EXIT_FAILURE);
}
/* init the encode api */
SX_DEBUG("Creating encoder.");
NV_ENCODE_API_FUNCTION_LIST enc_ctx = { NV_ENCODE_API_FUNCTION_LIST_VER };
NVENCSTATUS result = NV_ENC_SUCCESS;
result = NvEncodeAPICreateInstance(&enc_ctx);
if (NV_ENC_SUCCESS != result) {
SX_ERROR("Failed to create an encoder instance.");
exit(EXIT_FAILURE);
}
/* create encoder instance */
NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS encode_cfg = { NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER };
encode_cfg.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
encode_cfg.device = (void*)cuda_ctx->getContextHandle();
encode_cfg.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
encode_cfg.apiVersion = NVENCAPI_VERSION;
void* encoder = nullptr;
result = enc_ctx.nvEncOpenEncodeSessionEx(&encode_cfg, (void**)&encoder);
if (NV_ENC_SUCCESS != result) {
SX_ERROR("Failed to open an encoder session.");
exit(EXIT_FAILURE);
}
if (nullptr == encoder) {
SX_ERROR("encoder is nullptr.");
exit(EXIT_FAILURE);
}
/* ------------------------------------------------------------------------------------ */
/* C A P A B I L I T I E S
/* ------------------------------------------------------------------------------------ */
/* get encode guid, e.g. h264/hevc. */
uint32_t enc_guid_count = 0;
result = enc_ctx.nvEncGetEncodeGUIDCount(encoder, &enc_guid_count);
if (NV_ENC_SUCCESS != result) {
SX_ERROR("Failed to get the GUID count.");
exit(EXIT_FAILURE);
}
GUID* enc_guids = (GUID*)malloc(enc_guid_count * sizeof(GUID));
if (nullptr == enc_guids) {
SX_ERROR("Failed to allocate our (temporary) buffer to store guids.");
exit(EXIT_FAILURE);
}
/* @todo we should check if e.g. h264 is supported. */
uint32_t enc_guid_count_returned = 0;
result = enc_ctx.nvEncGetEncodeGUIDs(encoder, enc_guids, enc_guid_count, &enc_guid_count_returned);
if (NV_ENC_SUCCESS != result) {
SX_ERROR("Failed to get the encoder GUIDs.");
exit(EXIT_FAILURE);
}
SX_DEBUG("Encode GUID count %u and returned %u.", enc_guid_count, enc_guid_count_returned);
for (uint32_t i = 0; i < enc_guid_count_returned; ++i) {
SX_DEBUG("Supported encoder GUID: %s", nvenc_guid_to_string(enc_guids[i]).c_str());
}
/* @todo currently we hardcoded these GUIDs. we should check if we can use them */
GUID guid_h264 = NV_ENC_CODEC_H264_GUID;
GUID guid_preset = NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID;
uint32_t width = 1280;
uint32_t height = 720;
/* get preset guids */
uint32_t preset_guid_count = 0;
result = enc_ctx.nvEncGetEncodePresetCount(encoder, guid_h264, &preset_guid_count);
if (NV_ENC_SUCCESS != result) {
SX_ERROR("Failed to get the encoder preset count.");
exit(EXIT_FAILURE);
}
SX_DEBUG("Supported presets %u.", preset_guid_count);
GUID* preset_guids = (GUID*) malloc(preset_guid_count * sizeof(GUID));
if (nullptr == preset_guids) {
SX_ERROR("Failed to allocate our preset_guids buffer.");
exit(EXIT_FAILURE);
}
uint32_t preset_guid_count_returned = 0;
result = enc_ctx.nvEncGetEncodePresetGUIDs(encoder, guid_h264, preset_guids, preset_guid_count, &preset_guid_count_returned);
if (NV_ENC_SUCCESS != result) {
SX_ERROR("Failed to get the encode preset guids.");
exit(EXIT_FAILURE);
}
for (uint32_t i = 0; i < preset_guid_count_returned; ++i) {
SX_DEBUG("Supported preset %s", nvenc_guid_to_string(preset_guids[i]).c_str());
}
/* get encode profiles. */
uint32_t profile_guid_count = 0;
result = enc_ctx.nvEncGetEncodeProfileGUIDCount(encoder, guid_h264, &profile_guid_count);
if (NV_ENC_SUCCESS != result) {
SX_ERROR("Failed to get the encoder profile guid count.");
exit(EXIT_FAILURE);
}
GUID* profile_guids = (GUID*)malloc(profile_guid_count * sizeof(GUID));
if (nullptr == profile_guids) {
SX_ERROR("Failed to allocate our profile guids buffer.");
exit(EXIT_FAILURE);
}
uint32_t profile_guid_count_returned = 0;
result = enc_ctx.nvEncGetEncodeProfileGUIDs(encoder, guid_h264, profile_guids, profile_guid_count, &profile_guid_count_returned);
if (NV_ENC_SUCCESS != result) {
SX_ERROR("Failed to get profile GUIDs.");
exit(EXIT_FAILURE);
}
SX_DEBUG("Profile GUID count %u and returned %u", profile_guid_count, profile_guid_count_returned);
for (uint32_t i = 0; i < profile_guid_count_returned; ++i) {
SX_DEBUG("Supported profile GUID: %s", nvenc_guid_to_string(profile_guids[i]).c_str());
}
/* get input formats */
uint32_t format_guid_count = 0;
result = enc_ctx.nvEncGetInputFormatCount(encoder, guid_h264, &format_guid_count);
if (NV_ENC_SUCCESS != result) {
SX_ERROR("Failed to get the input format count.");
exit(EXIT_FAILURE);
}
NV_ENC_BUFFER_FORMAT* formats = (NV_ENC_BUFFER_FORMAT*)(malloc(format_guid_count * sizeof(NV_ENC_BUFFER_FORMAT)));
if (nullptr == formats) {
SX_ERROR("Failed to allocate our array for the supported buffer formats.");
exit(EXIT_FAILURE);
}
uint32_t format_guid_count_returned = 0;
result = enc_ctx.nvEncGetInputFormats(encoder, guid_h264, formats, format_guid_count, &format_guid_count_returned);
if (NV_ENC_SUCCESS != result) {
SX_ERROR("Failed to get the input formats.");
exit(EXIT_FAILURE);
}
SX_DEBUG("Supported input formats %u.", format_guid_count);
for (uint32_t i = 0; i < format_guid_count_returned; ++i) {
SX_DEBUG("Supported format %s.", nvenc_bufferformat_to_string(formats[i]).c_str());
}
/* ------------------------------------------------------------------------------------ */
/* I N I T I A L I Z E E N C O D E R
/* ------------------------------------------------------------------------------------ */
/* get configs based on our selected preset and codec. */
NV_ENC_PRESET_CONFIG preset_config = { NV_ENC_PRESET_CONFIG_VER, { NV_ENC_CONFIG_VER } };
result = enc_ctx.nvEncGetEncodePresetConfig(encoder, guid_h264, guid_preset, &preset_config);
if (NV_ENC_SUCCESS != result) {
SX_ERROR("Failed to get the preset config.");
exit(EXIT_FAILURE);
}
/* create init params.
- preset_config.presetCfg ==> `NV_ENC_CONFIG presetCfg`
- init_params.encodeConfig ==> `NV_ENC_CONFIG* encodeConfig`
*/
NV_ENC_INITIALIZE_PARAMS init_params = {};
init_params.encodeConfig = &preset_config.presetCfg;
init_params.version = NV_ENC_INITIALIZE_PARAMS_VER;
init_params.encodeGUID = guid_h264;
init_params.presetGUID = guid_preset;
init_params.encodeWidth = width;
init_params.encodeHeight = height;
init_params.darWidth = width;
init_params.darHeight = height;
init_params.frameRateNum = 30;
init_params.frameRateDen = 1;
init_params.enableEncodeAsync = 0; /* @todo this is something worth looking into. */
init_params.enablePTD = 1; /* when we want to supply the input buffers in display order we set this to 1. */
init_params.reportSliceOffsets = 0; /* @todo check meaning of this prop. */
init_params.enableSubFrameWrite= 0; /* @todo check meaning of this prop. */
init_params.enableExternalMEHints = 0; /* @todo */
init_params.enableMEOnlyMode = 0; /* @todo */
init_params.enableWeightedPrediction = 0; /* @todo */
init_params.maxEncodeWidth = width;
init_params.maxEncodeHeight = height;
/* Use average bitrate. Try to use `averageBitRate` and don't exceed `maxBitRate`. */
init_params.encodeConfig->rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR;
init_params.encodeConfig->rcParams.averageBitRate = 1000000;
init_params.encodeConfig->rcParams.maxBitRate = 2000000;
result = enc_ctx.nvEncInitializeEncoder(encoder, &init_params);
if (NV_ENC_SUCCESS != result) {
SX_ERROR("Failed to initialize the encoder.");
exit(EXIT_FAILURE);
}
/*
I found this in the NVENCEncoder.cpp from OBS. I'm not sure
why they use this formula. The
`NVENC_VideoEncoder_API_ProgGuide.pdf` describes that you
should at least allocate (1 + Nb) input/output buffers,
where `Nb` is the number of B frames between successive P frames.
Also, it seems that the NVENCEncoder.cpp from OBS is forcing the
buffers to be 32-byte aligned. I found this in the forum too
https://devtalk.nvidia.com/default/topic/811878/?comment=4683617
*/
int num_macroblocks = ((width + 15) / 16) * ((height + 15) / 16);
int max_surfaces = (num_macroblocks >= 8160) ? 16 : 32;
NV_ENC_INPUT_PTR* input_buffers = (NV_ENC_INPUT_PTR*) malloc(max_surfaces * sizeof(NV_ENC_INPUT_PTR));
NV_ENC_OUTPUT_PTR* output_buffers = (NV_ENC_OUTPUT_PTR*) malloc(max_surfaces * sizeof(NV_ENC_OUTPUT_PTR));
SX_DEBUG("Num surfaces %u", max_surfaces);
for (int i = 0; i < max_surfaces; ++i) {
/* input buffer */
NV_ENC_CREATE_INPUT_BUFFER create_buffer_info = {};
create_buffer_info.version = NV_ENC_CREATE_INPUT_BUFFER_VER;
create_buffer_info.width = (width + 31) & ~31; /* Buffers need to be 32-byte aligned. */
create_buffer_info.height = (height + 31) & ~31; /* Buffers need to be 32-byte aligned. */
create_buffer_info.memoryHeap = NV_ENC_MEMORY_HEAP_AUTOSELECT; /* this is deprecated, but lets set a sane value. */
create_buffer_info.bufferFmt = NV_ENC_BUFFER_FORMAT_ARGB;
result = enc_ctx.nvEncCreateInputBuffer(encoder, &create_buffer_info);
if (NV_ENC_SUCCESS != result) {
SX_ERROR("Failed to allocate an input buffer.");
exit(EXIT_FAILURE);
}
if (nullptr == create_buffer_info.inputBuffer) {
SX_ERROR("The created input buffer is nullptr.");
exit(EXIT_FAILURE);
}
/* output buffer. */
NV_ENC_CREATE_BITSTREAM_BUFFER create_bitstream_info = {};
create_bitstream_info.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
result = enc_ctx.nvEncCreateBitstreamBuffer(encoder, &create_bitstream_info);
if (NV_ENC_SUCCESS != result) {
SX_ERROR("Failed to create output buffer.");
exit(EXIT_FAILURE);
}
input_buffers[i] = create_buffer_info.inputBuffer;
output_buffers[i] = create_bitstream_info.bitstreamBuffer;
}
/* get SPS and PPS */
uint32_t sps_pps_nbytes = 0;
char sps_pps[NV_MAX_SEQ_HDR_LEN] = {};
NV_ENC_SEQUENCE_PARAM_PAYLOAD seq_payload = {};
seq_payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
seq_payload.spsId = 0;
seq_payload.ppsId = 0;
seq_payload.inBufferSize = sizeof(sps_pps);
seq_payload.spsppsBuffer = (void*) sps_pps;
seq_payload.outSPSPPSPayloadSize = &sps_pps_nbytes;
result = enc_ctx.nvEncGetSequenceParams(encoder, &seq_payload);
if (NV_ENC_SUCCESS != result) {
SX_ERROR("Failed to get the sequence params.");
exit(EXIT_FAILURE);
}
SX_VERBOSE("SPS/PPS size: %u", sps_pps_nbytes);
/* fill a frame with some color. */
NV_ENC_PIC_PARAMS pic_params = {};
pic_params.version = NV_ENC_PIC_PARAMS_VER;
pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
pic_params.inputBuffer = input_buffers[0];
pic_params.bufferFmt = NV_ENC_BUFFER_FORMAT_ARGB;
pic_params.inputWidth = width;
pic_params.inputHeight = height;
pic_params.outputBitstream = output_buffers[0];
NV_ENC_LOCK_INPUT_BUFFER input_buffer_lock = {};
input_buffer_lock.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
input_buffer_lock.doNotWait = 0; /* do wait ^.^, make `nvEncLockInputBuffer()` blocking. */
input_buffer_lock.inputBuffer = input_buffers[0];
{
// result = enc_ctx.nvEncLockInputBuffer(encoder, &input_buffer_lock);
result = enc_ctx.nvEncLockInputBuffer(encoder, &input_buffer_lock);
if (NV_ENC_SUCCESS != result) {
SX_ERROR("Failed to lock an input buffer.");
exit(EXIT_FAILURE);
}
SX_DEBUG("Buffer pitch: %u", input_buffer_lock.pitch);
uint8_t* pixels = (uint8_t*) input_buffer_lock.bufferDataPtr;
for (int j = 0; j < height; ++j) {
for (int i = 0; i < width; ++i) {
int dx = (j * input_buffer_lock.pitch) + i * 4;
pixels[dx + 0] = 0xFF;
pixels[dx + 1] = 0x00;
pixels[dx + 2] = 0x00;
pixels[dx + 3] = 0xFF;
}
}
/*
This causes a segfault. GDB shows me:
Thread 1 "test-nvenc-v0-d" received signal SIGSEGV, Segmentation fault.
0x00007ffff687d268 in ?? () from /usr/lib/libnvcuvid.so.1
(gdb) bt
#0 0x00007ffff687d268 in ?? () from /usr/lib/libnvcuvid.so.1
#1 0x00005555555588d1 in main (argc=3, argv=0x7fffffffe838) at test-nvenc-v0.cpp:357
*/
result = enc_ctx.nvEncUnlockInputBuffer(encoder, &input_buffer_lock);
if (NV_ENC_SUCCESS != result) {
SX_ERROR("Failed to unload the input buffer.");
exit(EXIT_FAILURE);
}
}
/*
@todo
- free enc_guids
- free profile_guids
- free format guids
- free preset guids
- free cuda context
- free cuda device
- free input_buffers
- free output_buffers
*/
return 0;
}
static bool nvenc_guid_compare(const GUID& a, const GUID& b) {
return (0 == memcmp((const void*)&a, (const void*)&b, sizeof(b))) ? true : false;
}
static std::string nvenc_guid_to_string(GUID guid) {
if (true == nvenc_guid_compare(guid, NV_ENC_CODEC_H264_GUID)) { return "NV_ENC_CODEC_H264_GUID"; }
else if (true == nvenc_guid_compare(guid, NV_ENC_CODEC_HEVC_GUID)) { return "NV_ENC_CODEC_HEVC_GUID"; }
else if (true == nvenc_guid_compare(guid, NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID)) { return "NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID"; }
else if (true == nvenc_guid_compare(guid, NV_ENC_H264_PROFILE_BASELINE_GUID)) { return "NV_ENC_H264_PROFILE_BASELINE_GUID"; }
else if (true == nvenc_guid_compare(guid, NV_ENC_H264_PROFILE_MAIN_GUID)) { return "NV_ENC_H264_PROFILE_MAIN_GUID"; }
else if (true == nvenc_guid_compare(guid, NV_ENC_H264_PROFILE_HIGH_GUID)) { return "NV_ENC_H264_PROFILE_HIGH_GUID"; }
else if (true == nvenc_guid_compare(guid, NV_ENC_H264_PROFILE_HIGH_444_GUID)) { return "NV_ENC_H264_PROFILE_HIGH_444_GUID"; }
else if (true == nvenc_guid_compare(guid, NV_ENC_H264_PROFILE_STEREO_GUID)) { return "NV_ENC_H264_PROFILE_STEREO_GUID"; }
else if (true == nvenc_guid_compare(guid, NV_ENC_H264_PROFILE_SVC_TEMPORAL_SCALABILTY)) { return "NV_ENC_H264_PROFILE_SVC_TEMPORAL_SCALABILTY"; }
else if (true == nvenc_guid_compare(guid, NV_ENC_H264_PROFILE_PROGRESSIVE_HIGH_GUID)) { return "NV_ENC_H264_PROFILE_PROGRESSIVE_HIGH_GUID"; }
else if (true == nvenc_guid_compare(guid, NV_ENC_H264_PROFILE_CONSTRAINED_HIGH_GUID)) { return "NV_ENC_H264_PROFILE_CONSTRAINED_HIGH_GUID"; }
else if (true == nvenc_guid_compare(guid, NV_ENC_HEVC_PROFILE_MAIN_GUID)) { return "NV_ENC_HEVC_PROFILE_MAIN_GUID"; }
else if (true == nvenc_guid_compare(guid, NV_ENC_HEVC_PROFILE_MAIN10_GUID)) { return "NV_ENC_HEVC_PROFILE_MAIN10_GUID"; }
else if (true == nvenc_guid_compare(guid, NV_ENC_HEVC_PROFILE_FREXT_GUID)) { return "NV_ENC_HEVC_PROFILE_FREXT_GUID"; }
else if (true == nvenc_guid_compare(guid, NV_ENC_PRESET_DEFAULT_GUID)) { return "NV_ENC_PRESET_DEFAULT_GUID"; }
else if (true == nvenc_guid_compare(guid, NV_ENC_PRESET_HP_GUID)) { return "NV_ENC_PRESET_HP_GUID"; }
else if (true == nvenc_guid_compare(guid, NV_ENC_PRESET_HQ_GUID)) { return "NV_ENC_PRESET_HQ_GUID"; }
else if (true == nvenc_guid_compare(guid, NV_ENC_PRESET_BD_GUID)) { return "NV_ENC_PRESET_BD_GUID"; }
else if (true == nvenc_guid_compare(guid, NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID)) { return "NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID"; }
else if (true == nvenc_guid_compare(guid, NV_ENC_PRESET_LOW_LATENCY_HQ_GUID)) { return "NV_ENC_PRESET_LOW_LATENCY_HQ_GUID"; }
else if (true == nvenc_guid_compare(guid, NV_ENC_PRESET_LOW_LATENCY_HP_GUID)) { return "NV_ENC_PRESET_LOW_LATENCY_HP_GUID"; }
else if (true == nvenc_guid_compare(guid, NV_ENC_PRESET_LOSSLESS_DEFAULT_GUID)) { return "NV_ENC_PRESET_LOSSLESS_DEFAULT_GUID"; }
else if (true == nvenc_guid_compare(guid, NV_ENC_PRESET_LOSSLESS_HP_GUID)) { return "NV_ENC_PRESET_LOSSLESS_HP_GUID"; }
else { return "UNKNOWN"; }
}
static std::string nvenc_bufferformat_to_string(uint32_t fmt) {
switch (fmt) {
case NV_ENC_BUFFER_FORMAT_UNDEFINED: { return "NV_ENC_BUFFER_FORMAT_UNDEFINED"; }
case NV_ENC_BUFFER_FORMAT_NV12: { return "NV_ENC_BUFFER_FORMAT_NV12"; }
case NV_ENC_BUFFER_FORMAT_YV12: { return "NV_ENC_BUFFER_FORMAT_YV12"; }
case NV_ENC_BUFFER_FORMAT_IYUV: { return "NV_ENC_BUFFER_FORMAT_IYUV"; }
case NV_ENC_BUFFER_FORMAT_YUV444: { return "NV_ENC_BUFFER_FORMAT_YUV444"; }
case NV_ENC_BUFFER_FORMAT_YUV420_10BIT: { return "NV_ENC_BUFFER_FORMAT_YUV420_10BIT"; }
case NV_ENC_BUFFER_FORMAT_YUV444_10BIT: { return "NV_ENC_BUFFER_FORMAT_YUV444_10BIT"; }
case NV_ENC_BUFFER_FORMAT_ARGB: { return "NV_ENC_BUFFER_FORMAT_ARGB"; }
case NV_ENC_BUFFER_FORMAT_ARGB10: { return "NV_ENC_BUFFER_FORMAT_ARGB10"; }
case NV_ENC_BUFFER_FORMAT_AYUV: { return "NV_ENC_BUFFER_FORMAT_AYUV"; }
case NV_ENC_BUFFER_FORMAT_ABGR: { return "NV_ENC_BUFFER_FORMAT_ABGR"; }
case NV_ENC_BUFFER_FORMAT_ABGR10: { return "NV_ENC_BUFFER_FORMAT_ABGR10"; }
default: { return "UNKNOWN"; }
}
}
I’m following the “NVENC_VideoEncoder_API_ProgGuide.df” from Jan 2018.