I am attempting to create as simple of an encoder program to get it working and build on later, but I can not seem to get the encoder to init correctly.
The calls to NVIdia APIs are as follows:
NvEncodeAPIGetMaxSupportedVersion
NvEncodeAPICreateInstance
cuInit
cuDeviceGet
cuCtxCreate
cuCtxPopCurrent
nvEncOpenEncodeSessionEx
nvEncGetEncodeGUIDCount
nvEncGetEncodeGUIDs
nvEncGetEncodePresetCount
nvEncGetEncodePresetGUIDs
nvEncGetEncodeProfileGUIDCount
nvEncGetEncodeProfileGUIDs
nvEncGetInputFormatCount
nvEncGetInputFormats
nvEncGetEncodePresetConfig
nvEncGetEncodePresetConfig fails with the error NV_ENC_ERR_INVALID_DEVICE, and I am not sure why that is.
Here is the code that sets up the parameters that are passed to nvEncGetEncodePresetConfig:
NV_ENC_INITIALIZE_PARAMS createEncodeParams;
memset(&createEncodeParams, 0, sizeof(NV_ENC_INITIALIZE_PARAMS));
createEncodeParams.version = NV_ENC_INITIALIZE_PARAMS_VER;
createEncodeParams.encodeGUID = NV_ENC_CODEC_H264_GUID;
createEncodeParams.presetGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
createEncodeParams.encodeWidth = 640;
createEncodeParams.encodeHeight = 480;
createEncodeParams.darWidth = 640;
createEncodeParams.darHeight = 480;
createEncodeParams.frameRateNum = 60;
createEncodeParams.frameRateDen = 1;
createEncodeParams.encodeConfig = nullptr;
createEncodeParams.enablePTD = 1;
createEncodeParams.maxEncodeWidth = 640;
createEncodeParams.maxEncodeHeight = 480;
NV_ENC_PRESET_CONFIG presetConfig;
memset(&presetConfig, 0, sizeof(NV_ENC_PRESET_CONFIG));
presetConfig.version = NV_ENC_PRESET_CONFIG_VER;
presetConfig.presetCfg.version = NV_ENC_CONFIG_VER;
print("nvEncGetEncodePresetConfig");
nvEncCall(functionList.nvEncGetEncodePresetConfig(encoder, createEncodeParams.encodeGUID, createEncodeParams.presetGUID, &presetConfig));
The methods print and nvEncCall are just helpers, nvEncCall is just a giant switch that prints the error and terminates.
As far as I can tell I am doing this right, at least from what I am reading in the docs. NV_ENC_ERR_INVALID_DEVICE should never be returned by nvEncGetEncodePresetConfig according to them as well, which I find quite strange.
The entirety of the code that I am using for this, a giant block because I am going for “just works” right now.
The thing I do not understand in the docs is what a “floating CUDA context” even is, I tried to look that up and came up empty handed.
#include <iostream>
#include <Windows.h>
#include <nvEncodeAPI.h>
#include <cuda.h>
#define ENUM_VALUE_ERROR(var) case var : print( #var ); break;
typedef NVENCSTATUS(NVENCAPI* NvEncodeAPIGetMaxSupportedVersionPointer)(uint32_t*);
typedef NVENCSTATUS(NVENCAPI* NvEncodeAPICreateInstancePointer)(NV_ENCODE_API_FUNCTION_LIST*);
void print(const char* message) {
std::cout << message << "\n";
}
void nvEncCall(NVENCSTATUS result) {
if (result != NV_ENC_SUCCESS) {
switch (result) {
ENUM_VALUE_ERROR(NV_ENC_ERR_NO_ENCODE_DEVICE);
ENUM_VALUE_ERROR(NV_ENC_ERR_UNSUPPORTED_DEVICE);
ENUM_VALUE_ERROR(NV_ENC_ERR_INVALID_ENCODERDEVICE);
ENUM_VALUE_ERROR(NV_ENC_ERR_INVALID_DEVICE);
ENUM_VALUE_ERROR(NV_ENC_ERR_DEVICE_NOT_EXIST);
ENUM_VALUE_ERROR(NV_ENC_ERR_INVALID_PTR);
ENUM_VALUE_ERROR(NV_ENC_ERR_INVALID_EVENT);
ENUM_VALUE_ERROR(NV_ENC_ERR_INVALID_PARAM);
ENUM_VALUE_ERROR(NV_ENC_ERR_INVALID_CALL);
ENUM_VALUE_ERROR(NV_ENC_ERR_OUT_OF_MEMORY);
ENUM_VALUE_ERROR(NV_ENC_ERR_ENCODER_NOT_INITIALIZED);
ENUM_VALUE_ERROR(NV_ENC_ERR_UNSUPPORTED_PARAM);
ENUM_VALUE_ERROR(NV_ENC_ERR_LOCK_BUSY);
ENUM_VALUE_ERROR(NV_ENC_ERR_NOT_ENOUGH_BUFFER);
ENUM_VALUE_ERROR(NV_ENC_ERR_INVALID_VERSION);
ENUM_VALUE_ERROR(NV_ENC_ERR_MAP_FAILED);
ENUM_VALUE_ERROR(NV_ENC_ERR_NEED_MORE_INPUT);
ENUM_VALUE_ERROR(NV_ENC_ERR_ENCODER_BUSY);
ENUM_VALUE_ERROR(NV_ENC_ERR_EVENT_NOT_REGISTERD);
ENUM_VALUE_ERROR(NV_ENC_ERR_GENERIC);
ENUM_VALUE_ERROR(NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY);
ENUM_VALUE_ERROR(NV_ENC_ERR_UNIMPLEMENTED);
ENUM_VALUE_ERROR(NV_ENC_ERR_RESOURCE_REGISTER_FAILED);
ENUM_VALUE_ERROR(NV_ENC_ERR_RESOURCE_NOT_REGISTERED);
ENUM_VALUE_ERROR(NV_ENC_ERR_RESOURCE_NOT_MAPPED);
}
exit(1);
}
}
void cudaCall(CUresult status) {
if (status != CUDA_SUCCESS) {
switch (status) {
ENUM_VALUE_ERROR(CUDA_ERROR_INVALID_VALUE);
ENUM_VALUE_ERROR(CUDA_ERROR_OUT_OF_MEMORY);
ENUM_VALUE_ERROR(CUDA_ERROR_NOT_INITIALIZED);
ENUM_VALUE_ERROR(CUDA_ERROR_DEINITIALIZED);
ENUM_VALUE_ERROR(CUDA_ERROR_PROFILER_DISABLED);
ENUM_VALUE_ERROR(CUDA_ERROR_PROFILER_NOT_INITIALIZED);
ENUM_VALUE_ERROR(CUDA_ERROR_PROFILER_ALREADY_STARTED);
ENUM_VALUE_ERROR(CUDA_ERROR_PROFILER_ALREADY_STOPPED);
ENUM_VALUE_ERROR(CUDA_ERROR_NO_DEVICE);
ENUM_VALUE_ERROR(CUDA_ERROR_INVALID_DEVICE);
ENUM_VALUE_ERROR(CUDA_ERROR_INVALID_IMAGE);
ENUM_VALUE_ERROR(CUDA_ERROR_INVALID_CONTEXT);
ENUM_VALUE_ERROR(CUDA_ERROR_CONTEXT_ALREADY_CURRENT);
ENUM_VALUE_ERROR(CUDA_ERROR_MAP_FAILED);
ENUM_VALUE_ERROR(CUDA_ERROR_UNMAP_FAILED);
ENUM_VALUE_ERROR(CUDA_ERROR_ARRAY_IS_MAPPED);
ENUM_VALUE_ERROR(CUDA_ERROR_ALREADY_MAPPED);
ENUM_VALUE_ERROR(CUDA_ERROR_NO_BINARY_FOR_GPU);
ENUM_VALUE_ERROR(CUDA_ERROR_ALREADY_ACQUIRED);
ENUM_VALUE_ERROR(CUDA_ERROR_NOT_MAPPED);
ENUM_VALUE_ERROR(CUDA_ERROR_NOT_MAPPED_AS_ARRAY);
ENUM_VALUE_ERROR(CUDA_ERROR_NOT_MAPPED_AS_POINTER);
ENUM_VALUE_ERROR(CUDA_ERROR_ECC_UNCORRECTABLE);
ENUM_VALUE_ERROR(CUDA_ERROR_UNSUPPORTED_LIMIT);
ENUM_VALUE_ERROR(CUDA_ERROR_CONTEXT_ALREADY_IN_USE);
ENUM_VALUE_ERROR(CUDA_ERROR_PEER_ACCESS_UNSUPPORTED);
ENUM_VALUE_ERROR(CUDA_ERROR_INVALID_PTX);
ENUM_VALUE_ERROR(CUDA_ERROR_INVALID_GRAPHICS_CONTEXT);
ENUM_VALUE_ERROR(CUDA_ERROR_NVLINK_UNCORRECTABLE);
ENUM_VALUE_ERROR(CUDA_ERROR_JIT_COMPILER_NOT_FOUND);
ENUM_VALUE_ERROR(CUDA_ERROR_INVALID_SOURCE);
ENUM_VALUE_ERROR(CUDA_ERROR_FILE_NOT_FOUND);
ENUM_VALUE_ERROR(CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND);
ENUM_VALUE_ERROR(CUDA_ERROR_SHARED_OBJECT_INIT_FAILED);
ENUM_VALUE_ERROR(CUDA_ERROR_OPERATING_SYSTEM);
ENUM_VALUE_ERROR(CUDA_ERROR_INVALID_HANDLE);
ENUM_VALUE_ERROR(CUDA_ERROR_NOT_FOUND);
ENUM_VALUE_ERROR(CUDA_ERROR_NOT_READY);
ENUM_VALUE_ERROR(CUDA_ERROR_ILLEGAL_ADDRESS);
ENUM_VALUE_ERROR(CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES);
ENUM_VALUE_ERROR(CUDA_ERROR_LAUNCH_TIMEOUT);
ENUM_VALUE_ERROR(CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING);
ENUM_VALUE_ERROR(CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED);
ENUM_VALUE_ERROR(CUDA_ERROR_PEER_ACCESS_NOT_ENABLED);
ENUM_VALUE_ERROR(CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE);
ENUM_VALUE_ERROR(CUDA_ERROR_CONTEXT_IS_DESTROYED);
ENUM_VALUE_ERROR(CUDA_ERROR_ASSERT);
ENUM_VALUE_ERROR(CUDA_ERROR_TOO_MANY_PEERS);
ENUM_VALUE_ERROR(CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED);
ENUM_VALUE_ERROR(CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED);
ENUM_VALUE_ERROR(CUDA_ERROR_HARDWARE_STACK_ERROR);
ENUM_VALUE_ERROR(CUDA_ERROR_ILLEGAL_INSTRUCTION);
ENUM_VALUE_ERROR(CUDA_ERROR_MISALIGNED_ADDRESS);
ENUM_VALUE_ERROR(CUDA_ERROR_INVALID_ADDRESS_SPACE);
ENUM_VALUE_ERROR(CUDA_ERROR_INVALID_PC);
ENUM_VALUE_ERROR(CUDA_ERROR_LAUNCH_FAILED);
ENUM_VALUE_ERROR(CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE);
ENUM_VALUE_ERROR(CUDA_ERROR_NOT_PERMITTED);
ENUM_VALUE_ERROR(CUDA_ERROR_NOT_SUPPORTED);
ENUM_VALUE_ERROR(CUDA_ERROR_UNKNOWN);
}
exit(1);
}
}
void printGUID(GUID guid) {
printf(
"{ 0x%08X, 0x%04X, 0x%04X, { 0x%02X, 0x%02X, 0x%02X, 0x%02X, 0x%02X, 0x%02X, 0x%02X, 0x%02X } }\n",
guid.Data1,
guid.Data2,
guid.Data3,
guid.Data4[0],
guid.Data4[1],
guid.Data4[2],
guid.Data4[3],
guid.Data4[4],
guid.Data4[5],
guid.Data4[6],
guid.Data4[7]
);
}
void printEncBufferFormat(NV_ENC_BUFFER_FORMAT format) {
switch (format) {
ENUM_VALUE_ERROR(NV_ENC_BUFFER_FORMAT_UNDEFINED);
ENUM_VALUE_ERROR(NV_ENC_BUFFER_FORMAT_NV12);
ENUM_VALUE_ERROR(NV_ENC_BUFFER_FORMAT_YV12);
ENUM_VALUE_ERROR(NV_ENC_BUFFER_FORMAT_IYUV);
ENUM_VALUE_ERROR(NV_ENC_BUFFER_FORMAT_YUV444);
ENUM_VALUE_ERROR(NV_ENC_BUFFER_FORMAT_YUV420_10BIT);
ENUM_VALUE_ERROR(NV_ENC_BUFFER_FORMAT_YUV444_10BIT);
ENUM_VALUE_ERROR(NV_ENC_BUFFER_FORMAT_ARGB);
ENUM_VALUE_ERROR(NV_ENC_BUFFER_FORMAT_ARGB10);
ENUM_VALUE_ERROR(NV_ENC_BUFFER_FORMAT_AYUV);
ENUM_VALUE_ERROR(NV_ENC_BUFFER_FORMAT_ABGR);
ENUM_VALUE_ERROR(NV_ENC_BUFFER_FORMAT_ABGR10);
}
}
int main(){
print("LoadLibrary");
#ifdef _WIN64
HMODULE nvEncodeAPI = LoadLibrary(TEXT("nvEncodeAPI64.dll"));
#else
HMODULE nvEncodeAPI = LoadLibrary(TEXT("nvEncodeAPI.dll"));
#endif
if (nvEncodeAPI == INVALID_HANDLE_VALUE) {
print("Failed to load nvEncodeAPI");
return 1;
}
print("GetProcAddress");
NvEncodeAPIGetMaxSupportedVersionPointer getMaxSupportedVersion = (NvEncodeAPIGetMaxSupportedVersionPointer)GetProcAddress(nvEncodeAPI, "NvEncodeAPIGetMaxSupportedVersion");
NvEncodeAPICreateInstancePointer createInstancePointer = (NvEncodeAPICreateInstancePointer)GetProcAddress(nvEncodeAPI, "NvEncodeAPICreateInstance");
uint32_t version = 0;
uint32_t currentVersion = (NVENCAPI_MAJOR_VERSION << 4) | NVENCAPI_MINOR_VERSION;
print("NvEncodeAPIGetMaxSupportedVersion");
nvEncCall(getMaxSupportedVersion(&version));
if (currentVersion > version) {
print("Current version is too old");
return 1;
}
NV_ENCODE_API_FUNCTION_LIST functionList;
memset(&functionList, 0, sizeof(NV_ENCODE_API_FUNCTION_LIST));
functionList.version = NV_ENCODE_API_FUNCTION_LIST_VER;
print("NvEncodeAPICreateInstance");
nvEncCall(createInstancePointer(&functionList));
print("cuInit");
cudaCall(cuInit(0));
CUdevice cudaDevice;
print("cuDeviceGet");
cudaCall(cuDeviceGet(&cudaDevice, 0));
int cudaMajor;
int cudaMinor;
print("cuDeviceComputeCapability");
#pragma warning(disable: 4996)
cudaCall(cuDeviceComputeCapability(&cudaMajor, &cudaMinor, cudaDevice));
#pragma warning(enable: 4996)
if ((cudaMajor << 4) + cudaMinor < 0x30) {
print("No NVEC support");
return 1;
}
CUcontext cudaContext;
print("cuCtxCreate");
cudaCall(cuCtxCreate(&cudaContext, 0, cudaDevice));
print("cuCtxPopCurrent");
cudaCall(cuCtxPopCurrent(nullptr));
NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS nvEncParams;
memset(&nvEncParams, 0, sizeof(NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS));
nvEncParams.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
nvEncParams.device = cudaContext;
nvEncParams.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
nvEncParams.apiVersion = NVENCAPI_VERSION;
void* encoder;
print("nvEncOpenEncodeSessionEx");
nvEncCall(functionList.nvEncOpenEncodeSessionEx(&nvEncParams, &encoder));
uint32_t encoderGUIDCount;
print("nvEncGetEncodeGUIDCount");
nvEncCall(functionList.nvEncGetEncodeGUIDCount(encoder, &encoderGUIDCount));
print("HeapAlloc");
GUID* encoderGUIDs = static_cast<GUID*>(HeapAlloc(
GetProcessHeap(),
HEAP_ZERO_MEMORY,
encoderGUIDCount * sizeof(GUID)
));
if (!encoderGUIDs) {
print("Failed");
return 1;
}
print("nvEncGetEncodeGUIDs");
nvEncCall(functionList.nvEncGetEncodeGUIDs(encoder, encoderGUIDs, encoderGUIDCount, &encoderGUIDCount));
for (uint32_t i = 0; i < encoderGUIDCount; i++) {
printGUID(encoderGUIDs[i]);
}
print("HeapFree");
HeapFree(
GetProcessHeap(),
0,
static_cast<void*>(encoderGUIDs)
);
uint32_t presetGUIDCount;
print("nvEncGetEncodePresetCount");
nvEncCall(functionList.nvEncGetEncodePresetCount(encoder, NV_ENC_CODEC_H264_GUID, &presetGUIDCount));
print("HeapAlloc");
GUID* presetGUIDs = static_cast<GUID*>(HeapAlloc(
GetProcessHeap(),
HEAP_ZERO_MEMORY,
presetGUIDCount * sizeof(GUID)
));
if (!presetGUIDs) {
print("Failed");
return 1;
}
print("nvEncGetEncodePresetGUIDs");
nvEncCall(functionList.nvEncGetEncodePresetGUIDs(encoder, NV_ENC_CODEC_H264_GUID, presetGUIDs, presetGUIDCount, &presetGUIDCount));
for (uint32_t i = 0; i < presetGUIDCount; i++) {
printGUID(presetGUIDs[i]);
}
print("HeapFree");
HeapFree(
GetProcessHeap(),
0,
static_cast<void*>(presetGUIDs)
);
uint32_t encodeProfileGUIDCount;
print("nvEncGetEncodeProfileGUIDCount");
nvEncCall(functionList.nvEncGetEncodeProfileGUIDCount(encoder, NV_ENC_CODEC_H264_GUID, &encodeProfileGUIDCount));
print("HeapAlloc");
GUID* encodeProfileGUIDs = static_cast<GUID*>(HeapAlloc(
GetProcessHeap(),
HEAP_ZERO_MEMORY,
encodeProfileGUIDCount * sizeof(GUID)
));
if (!encodeProfileGUIDs) {
print("Failed");
return 1;
}
print("nvEncGetEncodeProfileGUIDs");
nvEncCall(functionList.nvEncGetEncodeProfileGUIDs(encoder, NV_ENC_CODEC_H264_GUID, encodeProfileGUIDs, encodeProfileGUIDCount, &encodeProfileGUIDCount));
for (uint32_t i = 0; i < encodeProfileGUIDCount; i++) {
printGUID(encodeProfileGUIDs[i]);
}
print("HeapFree");
HeapFree(
GetProcessHeap(),
0,
static_cast<void*>(encodeProfileGUIDs)
);
uint32_t encodeInputFormatCount;
print("nvEncGetInputFormatCount");
nvEncCall(functionList.nvEncGetInputFormatCount(encoder, NV_ENC_CODEC_H264_GUID, &encodeInputFormatCount));
print("HeapAlloc");
NV_ENC_BUFFER_FORMAT* encodeInputFormats = static_cast<NV_ENC_BUFFER_FORMAT*>(HeapAlloc(
GetProcessHeap(),
HEAP_ZERO_MEMORY,
encodeInputFormatCount * sizeof(NV_ENC_BUFFER_FORMAT)
));
if (!encodeInputFormats) {
print("Failed");
return 1;
}
print("nvEncGetInputFormats");
nvEncCall(functionList.nvEncGetInputFormats(encoder, NV_ENC_CODEC_H264_GUID, encodeInputFormats, encodeInputFormatCount, &encodeInputFormatCount));
for (uint32_t i = 0; i < encodeProfileGUIDCount; i++) {
printEncBufferFormat(encodeInputFormats[i]);
}
print("HeapFree");
HeapFree(
GetProcessHeap(),
0,
static_cast<void*>(encodeInputFormats)
);
NV_ENC_INITIALIZE_PARAMS createEncodeParams;
memset(&createEncodeParams, 0, sizeof(NV_ENC_INITIALIZE_PARAMS));
createEncodeParams.version = NV_ENC_INITIALIZE_PARAMS_VER;
createEncodeParams.encodeGUID = NV_ENC_CODEC_H264_GUID;
createEncodeParams.presetGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
createEncodeParams.encodeWidth = 640;
createEncodeParams.encodeHeight = 480;
createEncodeParams.darWidth = 640;
createEncodeParams.darHeight = 480;
createEncodeParams.frameRateNum = 60;
createEncodeParams.frameRateDen = 1;
createEncodeParams.encodeConfig = nullptr;
createEncodeParams.enablePTD = 1;
createEncodeParams.maxEncodeWidth = 640;
createEncodeParams.maxEncodeHeight = 480;
NV_ENC_PRESET_CONFIG presetConfig;
memset(&presetConfig, 0, sizeof(NV_ENC_PRESET_CONFIG));
presetConfig.version = NV_ENC_PRESET_CONFIG_VER;
presetConfig.presetCfg.version = NV_ENC_CONFIG_VER;
print("nvEncGetEncodePresetConfig");
nvEncCall(functionList.nvEncGetEncodePresetConfig(encoder, createEncodeParams.encodeGUID, createEncodeParams.presetGUID, &presetConfig));
print("nvEncInitializeEncoder");
nvEncCall(functionList.nvEncInitializeEncoder(encoder, &createEncodeParams));
print("nvEncDestroyEncoder");
nvEncCall(functionList.nvEncDestroyEncoder(encoder));
print("cuCtxDestroy");
cudaCall(cuCtxDestroy(cudaContext));
print("FreeLibrary");
FreeLibrary(nvEncodeAPI);
return 0;
}