nvEncGetEncodePresetConfig: NV_ENC_ERR_INVALID_DEVICE

I am attempting to create as simple of an encoder program to get it working and build on later, but I can not seem to get the encoder to init correctly.

The calls to NVIdia APIs are as follows:

NvEncodeAPIGetMaxSupportedVersion
NvEncodeAPICreateInstance
cuInit
cuDeviceGet
cuCtxCreate
cuCtxPopCurrent
nvEncOpenEncodeSessionEx
nvEncGetEncodeGUIDCount
nvEncGetEncodeGUIDs
nvEncGetEncodePresetCount
nvEncGetEncodePresetGUIDs
nvEncGetEncodeProfileGUIDCount
nvEncGetEncodeProfileGUIDs
nvEncGetInputFormatCount
nvEncGetInputFormats
nvEncGetEncodePresetConfig

nvEncGetEncodePresetConfig fails with the error NV_ENC_ERR_INVALID_DEVICE, and I am not sure why that is.

Here is the code that sets up the parameters that are passed to nvEncGetEncodePresetConfig:

NV_ENC_INITIALIZE_PARAMS createEncodeParams;
memset(&createEncodeParams, 0, sizeof(NV_ENC_INITIALIZE_PARAMS));
createEncodeParams.version = NV_ENC_INITIALIZE_PARAMS_VER;
createEncodeParams.encodeGUID = NV_ENC_CODEC_H264_GUID;
createEncodeParams.presetGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
createEncodeParams.encodeWidth = 640;
createEncodeParams.encodeHeight = 480;
createEncodeParams.darWidth = 640;
createEncodeParams.darHeight = 480;
createEncodeParams.frameRateNum = 60;
createEncodeParams.frameRateDen = 1;
createEncodeParams.encodeConfig = nullptr;
createEncodeParams.enablePTD = 1;
createEncodeParams.maxEncodeWidth = 640;
createEncodeParams.maxEncodeHeight = 480;

NV_ENC_PRESET_CONFIG presetConfig;
memset(&presetConfig, 0, sizeof(NV_ENC_PRESET_CONFIG));
presetConfig.version = NV_ENC_PRESET_CONFIG_VER;
presetConfig.presetCfg.version = NV_ENC_CONFIG_VER;
print("nvEncGetEncodePresetConfig");
nvEncCall(functionList.nvEncGetEncodePresetConfig(encoder, createEncodeParams.encodeGUID, createEncodeParams.presetGUID, &presetConfig));

The methods print and nvEncCall are just helpers, nvEncCall is just a giant switch that prints the error and terminates.

As far as I can tell I am doing this right, at least from what I am reading in the docs. NV_ENC_ERR_INVALID_DEVICE should never be returned by nvEncGetEncodePresetConfig according to them as well, which I find quite strange.

The entirety of the code that I am using for this, a giant block because I am going for “just works” right now.

The thing I do not understand in the docs is what a “floating CUDA context” even is, I tried to look that up and came up empty handed.

#include <iostream>

#include <Windows.h>

#include <nvEncodeAPI.h>
#include <cuda.h>

#define ENUM_VALUE_ERROR(var) case var : print( #var ); break;

typedef NVENCSTATUS(NVENCAPI* NvEncodeAPIGetMaxSupportedVersionPointer)(uint32_t*);
typedef NVENCSTATUS(NVENCAPI* NvEncodeAPICreateInstancePointer)(NV_ENCODE_API_FUNCTION_LIST*);

void print(const char* message) {
	std::cout << message << "\n";
}

void nvEncCall(NVENCSTATUS result) {
	if (result != NV_ENC_SUCCESS) {
		switch (result) {
			ENUM_VALUE_ERROR(NV_ENC_ERR_NO_ENCODE_DEVICE);
			ENUM_VALUE_ERROR(NV_ENC_ERR_UNSUPPORTED_DEVICE);
			ENUM_VALUE_ERROR(NV_ENC_ERR_INVALID_ENCODERDEVICE);
			ENUM_VALUE_ERROR(NV_ENC_ERR_INVALID_DEVICE);
			ENUM_VALUE_ERROR(NV_ENC_ERR_DEVICE_NOT_EXIST);
			ENUM_VALUE_ERROR(NV_ENC_ERR_INVALID_PTR);
			ENUM_VALUE_ERROR(NV_ENC_ERR_INVALID_EVENT);
			ENUM_VALUE_ERROR(NV_ENC_ERR_INVALID_PARAM);
			ENUM_VALUE_ERROR(NV_ENC_ERR_INVALID_CALL);
			ENUM_VALUE_ERROR(NV_ENC_ERR_OUT_OF_MEMORY);
			ENUM_VALUE_ERROR(NV_ENC_ERR_ENCODER_NOT_INITIALIZED);
			ENUM_VALUE_ERROR(NV_ENC_ERR_UNSUPPORTED_PARAM);
			ENUM_VALUE_ERROR(NV_ENC_ERR_LOCK_BUSY);
			ENUM_VALUE_ERROR(NV_ENC_ERR_NOT_ENOUGH_BUFFER);
			ENUM_VALUE_ERROR(NV_ENC_ERR_INVALID_VERSION);
			ENUM_VALUE_ERROR(NV_ENC_ERR_MAP_FAILED);
			ENUM_VALUE_ERROR(NV_ENC_ERR_NEED_MORE_INPUT);
			ENUM_VALUE_ERROR(NV_ENC_ERR_ENCODER_BUSY);
			ENUM_VALUE_ERROR(NV_ENC_ERR_EVENT_NOT_REGISTERD);
			ENUM_VALUE_ERROR(NV_ENC_ERR_GENERIC);
			ENUM_VALUE_ERROR(NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY);
			ENUM_VALUE_ERROR(NV_ENC_ERR_UNIMPLEMENTED);
			ENUM_VALUE_ERROR(NV_ENC_ERR_RESOURCE_REGISTER_FAILED);
			ENUM_VALUE_ERROR(NV_ENC_ERR_RESOURCE_NOT_REGISTERED);
			ENUM_VALUE_ERROR(NV_ENC_ERR_RESOURCE_NOT_MAPPED);
		}
		exit(1);
	}
}

void cudaCall(CUresult status) {
	if (status != CUDA_SUCCESS) {
		switch (status) {
			ENUM_VALUE_ERROR(CUDA_ERROR_INVALID_VALUE);
			ENUM_VALUE_ERROR(CUDA_ERROR_OUT_OF_MEMORY);
			ENUM_VALUE_ERROR(CUDA_ERROR_NOT_INITIALIZED);
			ENUM_VALUE_ERROR(CUDA_ERROR_DEINITIALIZED);
			ENUM_VALUE_ERROR(CUDA_ERROR_PROFILER_DISABLED);
			ENUM_VALUE_ERROR(CUDA_ERROR_PROFILER_NOT_INITIALIZED);
			ENUM_VALUE_ERROR(CUDA_ERROR_PROFILER_ALREADY_STARTED);
			ENUM_VALUE_ERROR(CUDA_ERROR_PROFILER_ALREADY_STOPPED);
			ENUM_VALUE_ERROR(CUDA_ERROR_NO_DEVICE);
			ENUM_VALUE_ERROR(CUDA_ERROR_INVALID_DEVICE);
			ENUM_VALUE_ERROR(CUDA_ERROR_INVALID_IMAGE);
			ENUM_VALUE_ERROR(CUDA_ERROR_INVALID_CONTEXT);
			ENUM_VALUE_ERROR(CUDA_ERROR_CONTEXT_ALREADY_CURRENT);
			ENUM_VALUE_ERROR(CUDA_ERROR_MAP_FAILED);
			ENUM_VALUE_ERROR(CUDA_ERROR_UNMAP_FAILED);
			ENUM_VALUE_ERROR(CUDA_ERROR_ARRAY_IS_MAPPED);
			ENUM_VALUE_ERROR(CUDA_ERROR_ALREADY_MAPPED);
			ENUM_VALUE_ERROR(CUDA_ERROR_NO_BINARY_FOR_GPU);
			ENUM_VALUE_ERROR(CUDA_ERROR_ALREADY_ACQUIRED);
			ENUM_VALUE_ERROR(CUDA_ERROR_NOT_MAPPED);
			ENUM_VALUE_ERROR(CUDA_ERROR_NOT_MAPPED_AS_ARRAY);
			ENUM_VALUE_ERROR(CUDA_ERROR_NOT_MAPPED_AS_POINTER);
			ENUM_VALUE_ERROR(CUDA_ERROR_ECC_UNCORRECTABLE);
			ENUM_VALUE_ERROR(CUDA_ERROR_UNSUPPORTED_LIMIT);
			ENUM_VALUE_ERROR(CUDA_ERROR_CONTEXT_ALREADY_IN_USE);
			ENUM_VALUE_ERROR(CUDA_ERROR_PEER_ACCESS_UNSUPPORTED);
			ENUM_VALUE_ERROR(CUDA_ERROR_INVALID_PTX);
			ENUM_VALUE_ERROR(CUDA_ERROR_INVALID_GRAPHICS_CONTEXT);
			ENUM_VALUE_ERROR(CUDA_ERROR_NVLINK_UNCORRECTABLE);
			ENUM_VALUE_ERROR(CUDA_ERROR_JIT_COMPILER_NOT_FOUND);
			ENUM_VALUE_ERROR(CUDA_ERROR_INVALID_SOURCE);
			ENUM_VALUE_ERROR(CUDA_ERROR_FILE_NOT_FOUND);
			ENUM_VALUE_ERROR(CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND);
			ENUM_VALUE_ERROR(CUDA_ERROR_SHARED_OBJECT_INIT_FAILED);
			ENUM_VALUE_ERROR(CUDA_ERROR_OPERATING_SYSTEM);
			ENUM_VALUE_ERROR(CUDA_ERROR_INVALID_HANDLE);
			ENUM_VALUE_ERROR(CUDA_ERROR_NOT_FOUND);
			ENUM_VALUE_ERROR(CUDA_ERROR_NOT_READY);
			ENUM_VALUE_ERROR(CUDA_ERROR_ILLEGAL_ADDRESS);
			ENUM_VALUE_ERROR(CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES);
			ENUM_VALUE_ERROR(CUDA_ERROR_LAUNCH_TIMEOUT);
			ENUM_VALUE_ERROR(CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING);
			ENUM_VALUE_ERROR(CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED);
			ENUM_VALUE_ERROR(CUDA_ERROR_PEER_ACCESS_NOT_ENABLED);
			ENUM_VALUE_ERROR(CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE);
			ENUM_VALUE_ERROR(CUDA_ERROR_CONTEXT_IS_DESTROYED);
			ENUM_VALUE_ERROR(CUDA_ERROR_ASSERT);
			ENUM_VALUE_ERROR(CUDA_ERROR_TOO_MANY_PEERS);
			ENUM_VALUE_ERROR(CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED);
			ENUM_VALUE_ERROR(CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED);
			ENUM_VALUE_ERROR(CUDA_ERROR_HARDWARE_STACK_ERROR);
			ENUM_VALUE_ERROR(CUDA_ERROR_ILLEGAL_INSTRUCTION);
			ENUM_VALUE_ERROR(CUDA_ERROR_MISALIGNED_ADDRESS);
			ENUM_VALUE_ERROR(CUDA_ERROR_INVALID_ADDRESS_SPACE);
			ENUM_VALUE_ERROR(CUDA_ERROR_INVALID_PC);
			ENUM_VALUE_ERROR(CUDA_ERROR_LAUNCH_FAILED);
			ENUM_VALUE_ERROR(CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE);
			ENUM_VALUE_ERROR(CUDA_ERROR_NOT_PERMITTED);
			ENUM_VALUE_ERROR(CUDA_ERROR_NOT_SUPPORTED);
			ENUM_VALUE_ERROR(CUDA_ERROR_UNKNOWN);
		}

		exit(1);
	}
}

void printGUID(GUID guid) {
	printf(
		"{ 0x%08X, 0x%04X, 0x%04X, { 0x%02X, 0x%02X, 0x%02X, 0x%02X, 0x%02X, 0x%02X, 0x%02X, 0x%02X } }\n",
		guid.Data1,
		guid.Data2,
		guid.Data3,
		guid.Data4[0],
		guid.Data4[1],
		guid.Data4[2],
		guid.Data4[3],
		guid.Data4[4],
		guid.Data4[5],
		guid.Data4[6],
		guid.Data4[7]
	);
}

void printEncBufferFormat(NV_ENC_BUFFER_FORMAT format) {
	switch (format) {
		ENUM_VALUE_ERROR(NV_ENC_BUFFER_FORMAT_UNDEFINED);
			ENUM_VALUE_ERROR(NV_ENC_BUFFER_FORMAT_NV12);
				ENUM_VALUE_ERROR(NV_ENC_BUFFER_FORMAT_YV12);
					ENUM_VALUE_ERROR(NV_ENC_BUFFER_FORMAT_IYUV);
						ENUM_VALUE_ERROR(NV_ENC_BUFFER_FORMAT_YUV444);
							ENUM_VALUE_ERROR(NV_ENC_BUFFER_FORMAT_YUV420_10BIT);
								ENUM_VALUE_ERROR(NV_ENC_BUFFER_FORMAT_YUV444_10BIT);
									ENUM_VALUE_ERROR(NV_ENC_BUFFER_FORMAT_ARGB);
										ENUM_VALUE_ERROR(NV_ENC_BUFFER_FORMAT_ARGB10);
											ENUM_VALUE_ERROR(NV_ENC_BUFFER_FORMAT_AYUV);
												ENUM_VALUE_ERROR(NV_ENC_BUFFER_FORMAT_ABGR);
													ENUM_VALUE_ERROR(NV_ENC_BUFFER_FORMAT_ABGR10);
	}
}

int main(){
	print("LoadLibrary");
#ifdef _WIN64
	HMODULE nvEncodeAPI = LoadLibrary(TEXT("nvEncodeAPI64.dll"));
#else
	HMODULE nvEncodeAPI = LoadLibrary(TEXT("nvEncodeAPI.dll"));
#endif
	if (nvEncodeAPI == INVALID_HANDLE_VALUE) {
		print("Failed to load nvEncodeAPI");
		return 1;
	}

	print("GetProcAddress");
	NvEncodeAPIGetMaxSupportedVersionPointer getMaxSupportedVersion = (NvEncodeAPIGetMaxSupportedVersionPointer)GetProcAddress(nvEncodeAPI, "NvEncodeAPIGetMaxSupportedVersion");
	NvEncodeAPICreateInstancePointer createInstancePointer = (NvEncodeAPICreateInstancePointer)GetProcAddress(nvEncodeAPI, "NvEncodeAPICreateInstance");

	uint32_t version = 0;
	uint32_t currentVersion = (NVENCAPI_MAJOR_VERSION << 4) | NVENCAPI_MINOR_VERSION;
	print("NvEncodeAPIGetMaxSupportedVersion");
	nvEncCall(getMaxSupportedVersion(&version));
	if (currentVersion > version) {
		print("Current version is too old");
		return 1;
	}

	NV_ENCODE_API_FUNCTION_LIST functionList;
	memset(&functionList, 0, sizeof(NV_ENCODE_API_FUNCTION_LIST));
	functionList.version = NV_ENCODE_API_FUNCTION_LIST_VER;
	print("NvEncodeAPICreateInstance");
	nvEncCall(createInstancePointer(&functionList));

	print("cuInit");
	cudaCall(cuInit(0));

	CUdevice cudaDevice;
	print("cuDeviceGet");
	cudaCall(cuDeviceGet(&cudaDevice, 0));

	int cudaMajor;
	int cudaMinor;
	print("cuDeviceComputeCapability");
#pragma warning(disable: 4996)
	cudaCall(cuDeviceComputeCapability(&cudaMajor, &cudaMinor, cudaDevice));
#pragma warning(enable: 4996)

	if ((cudaMajor << 4) + cudaMinor < 0x30) {
		print("No NVEC support");
		return 1;
	}

	CUcontext cudaContext;
	print("cuCtxCreate");
	cudaCall(cuCtxCreate(&cudaContext, 0, cudaDevice));

	print("cuCtxPopCurrent");
	cudaCall(cuCtxPopCurrent(nullptr));

	NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS nvEncParams;
	memset(&nvEncParams, 0, sizeof(NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS));
	nvEncParams.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
	nvEncParams.device = cudaContext;
	nvEncParams.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
	nvEncParams.apiVersion = NVENCAPI_VERSION;
	void* encoder;
	print("nvEncOpenEncodeSessionEx");
	nvEncCall(functionList.nvEncOpenEncodeSessionEx(&nvEncParams, &encoder));

	uint32_t encoderGUIDCount;
	print("nvEncGetEncodeGUIDCount");
	nvEncCall(functionList.nvEncGetEncodeGUIDCount(encoder, &encoderGUIDCount));

	print("HeapAlloc");
	GUID* encoderGUIDs = static_cast<GUID*>(HeapAlloc(
		GetProcessHeap(),
		HEAP_ZERO_MEMORY,
		encoderGUIDCount * sizeof(GUID)
	));
	if (!encoderGUIDs) {
		print("Failed");
		return 1;
	}

	print("nvEncGetEncodeGUIDs");
	nvEncCall(functionList.nvEncGetEncodeGUIDs(encoder, encoderGUIDs, encoderGUIDCount, &encoderGUIDCount));

	for (uint32_t i = 0; i < encoderGUIDCount; i++) {
		printGUID(encoderGUIDs[i]);
	}

	print("HeapFree");
	HeapFree(
		GetProcessHeap(),
		0,
		static_cast<void*>(encoderGUIDs)
	);

	uint32_t presetGUIDCount;
	print("nvEncGetEncodePresetCount");
	nvEncCall(functionList.nvEncGetEncodePresetCount(encoder, NV_ENC_CODEC_H264_GUID, &presetGUIDCount));

	print("HeapAlloc");
	GUID* presetGUIDs = static_cast<GUID*>(HeapAlloc(
		GetProcessHeap(),
		HEAP_ZERO_MEMORY,
		presetGUIDCount * sizeof(GUID)
	));
	if (!presetGUIDs) {
		print("Failed");
		return 1;
	}

	print("nvEncGetEncodePresetGUIDs");
	nvEncCall(functionList.nvEncGetEncodePresetGUIDs(encoder, NV_ENC_CODEC_H264_GUID, presetGUIDs, presetGUIDCount, &presetGUIDCount));

	for (uint32_t i = 0; i < presetGUIDCount; i++) {
		printGUID(presetGUIDs[i]);
	}

	print("HeapFree");
	HeapFree(
		GetProcessHeap(),
		0,
		static_cast<void*>(presetGUIDs)
	);

	uint32_t encodeProfileGUIDCount;
	print("nvEncGetEncodeProfileGUIDCount");
	nvEncCall(functionList.nvEncGetEncodeProfileGUIDCount(encoder, NV_ENC_CODEC_H264_GUID, &encodeProfileGUIDCount));

	print("HeapAlloc");
	GUID* encodeProfileGUIDs = static_cast<GUID*>(HeapAlloc(
		GetProcessHeap(),
		HEAP_ZERO_MEMORY,
		encodeProfileGUIDCount * sizeof(GUID)
	));
	if (!encodeProfileGUIDs) {
		print("Failed");
		return 1;
	}

	print("nvEncGetEncodeProfileGUIDs");
	nvEncCall(functionList.nvEncGetEncodeProfileGUIDs(encoder, NV_ENC_CODEC_H264_GUID, encodeProfileGUIDs, encodeProfileGUIDCount, &encodeProfileGUIDCount));

	for (uint32_t i = 0; i < encodeProfileGUIDCount; i++) {
		printGUID(encodeProfileGUIDs[i]);
	}

	print("HeapFree");
	HeapFree(
		GetProcessHeap(),
		0,
		static_cast<void*>(encodeProfileGUIDs)
	);

	uint32_t encodeInputFormatCount;
	print("nvEncGetInputFormatCount");
	nvEncCall(functionList.nvEncGetInputFormatCount(encoder, NV_ENC_CODEC_H264_GUID, &encodeInputFormatCount));

	print("HeapAlloc");
	NV_ENC_BUFFER_FORMAT* encodeInputFormats = static_cast<NV_ENC_BUFFER_FORMAT*>(HeapAlloc(
		GetProcessHeap(),
		HEAP_ZERO_MEMORY,
		encodeInputFormatCount * sizeof(NV_ENC_BUFFER_FORMAT)
	));
	if (!encodeInputFormats) {
		print("Failed");
		return 1;
	}

	print("nvEncGetInputFormats");
	nvEncCall(functionList.nvEncGetInputFormats(encoder, NV_ENC_CODEC_H264_GUID, encodeInputFormats, encodeInputFormatCount, &encodeInputFormatCount));

	for (uint32_t i = 0; i < encodeProfileGUIDCount; i++) {
		printEncBufferFormat(encodeInputFormats[i]);
	}

	print("HeapFree");
	HeapFree(
		GetProcessHeap(),
		0,
		static_cast<void*>(encodeInputFormats)
	);

	NV_ENC_INITIALIZE_PARAMS createEncodeParams;
	memset(&createEncodeParams, 0, sizeof(NV_ENC_INITIALIZE_PARAMS));
	createEncodeParams.version = NV_ENC_INITIALIZE_PARAMS_VER;
	createEncodeParams.encodeGUID = NV_ENC_CODEC_H264_GUID;
	createEncodeParams.presetGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
	createEncodeParams.encodeWidth = 640;
	createEncodeParams.encodeHeight = 480;
	createEncodeParams.darWidth = 640;
	createEncodeParams.darHeight = 480;
	createEncodeParams.frameRateNum = 60;
	createEncodeParams.frameRateDen = 1;
	createEncodeParams.encodeConfig = nullptr;
	createEncodeParams.enablePTD = 1;
	createEncodeParams.maxEncodeWidth = 640;
	createEncodeParams.maxEncodeHeight = 480;

	NV_ENC_PRESET_CONFIG presetConfig;
	memset(&presetConfig, 0, sizeof(NV_ENC_PRESET_CONFIG));
	presetConfig.version = NV_ENC_PRESET_CONFIG_VER;
	presetConfig.presetCfg.version = NV_ENC_CONFIG_VER;
	print("nvEncGetEncodePresetConfig");
	nvEncCall(functionList.nvEncGetEncodePresetConfig(encoder, createEncodeParams.encodeGUID, createEncodeParams.presetGUID, &presetConfig));

	print("nvEncInitializeEncoder");
	nvEncCall(functionList.nvEncInitializeEncoder(encoder, &createEncodeParams));

	print("nvEncDestroyEncoder");
	nvEncCall(functionList.nvEncDestroyEncoder(encoder));

	print("cuCtxDestroy");
	cudaCall(cuCtxDestroy(cudaContext));

	print("FreeLibrary");
	FreeLibrary(nvEncodeAPI);
	return 0;
}
createEncodeParams.presetGUID = NV_ENC_H264_PROFILE_HIGH_GUID;

You are using a profile where you should be using a preset GUID. Took me ages to solve the same issue.

Hi,
I am having the same problem. But in my case
const GUID guidPreset = NV_ENC_PRESET_DEFAULT_GUID; // OK
// const GUID guidPreset = NV_ENC_PRESET_P3_GUID; Error 4
I’m using api 11.0.10.
Even the sample is using NV_ENC_PRESET_P3_GUID . Did I miss some settings?

Solved: I am using nvEncGetEncodePresetConfig for NV_ENC_PRESET_P3_GUID, which is incorrect. I should use nvEncGetEncodePresetConfigEx for PRESET_Px

In the SDK sample, calling nvEncGetEncodePresetConfig is first executed without checking errors, and then afterwards another call to nvEncGetEncodePresetConfigEx is executed. The first call is unnecessary and incorrect, and probably misleading. I suggest refactor that part or did I understood incorrectly? @rarzumanyan

I left a comment at gitlab regarding the part. To the commit 84155d04af40dbc259099639be8b96577e851b32