Cuda error: no CUDA-capable device is detected

Hello everyone,

I am trying to use GPU and FFMPEG to decode H264 frame from cameras to jpg images and display on UI of my application. The decoder class look like the attachment below. But the application is always returning error no CUDA-capable device is detected. I used “C:\ProgramData\NVIDIA Corporation\CUDA Samples\v11.0\bin\win64\Debug>deviceQuery.exe” to check the device capable and it returned

Detected 1 CUDA Capable device(s)

Device 0: "GeForce GTX 1060 3GB"

Could you please help me solve the issues? Thank you so much!

Error

[h264_cuvid @ 00000000285e4a80] ctx->cvdl->cuvidCreateDecoder(&ctx->cudecoder, &cuinfo) failed -> CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
[h264_cuvid @ 00000000285e4a80] cuvid decode callback error
[h264_cuvid @ 00000000285e4a80] ctx->cvdl->cuvidDecodePicture(ctx->cudecoder, picparams) failed -> CUDA_ERROR_INVALID_HANDLE: invalid resource handle
[h264_cuvid @ 00000000285e4a80] cuvid decode callback error
[h264_cuvid @ 00000000285e4a80] ctx->cvdl->cuvidDecodePicture(ctx->cudecoder, picparams) failed -> CUDA_ERROR_INVALID_HANDLE: invalid resource handle
[h264_cuvid @ 00000000285e4a80] cuvid decode callback error
[h264_cuvid @ 00000000285e4a80] ctx->cvdl->cuvidDecodePicture(ctx->cudecoder, picparams) failed -> CUDA_ERROR_INVALID_HANDLE: invalid resource handle

Header file

#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <vector>
#include <opencv2/core/core.hpp>
#include <plog/Log.h>

extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/pixdesc.h>
#include <libavutil/hwcontext.h>
#include <libavutil/opt.h>
#include <libavutil/avassert.h>
#include <libavutil/imgutils.h>
#include <libavutil/avutil.h>
#include <libswscale/swscale.h>
}

#include "DataStruct.h"

class HwH264Decoder {
public:
	HwH264Decoder();
	~HwH264Decoder();
	bool decodeFrame(RawH264Frame* rawH264Frame, RGBAFrame* rgbaFrame); 
private:
	int ret;
	enum AVHWDeviceType type;
	AVCodec *decoder = NULL;
	AVCodecContext *decoder_ctx = NULL;
	AVPacket packet;
	AVFrame *frame = NULL, *sw_frame = NULL;
	AVFrame *tmp_frame = NULL;
	AVFrame *destinationRGBAFrame;
	unsigned char *bufferImage;
	int dataSize;

	enum AVPixelFormat findFmtByHwType(const enum AVHWDeviceType type);
	int hw_decoder_init(AVCodecContext *ctx, const enum AVHWDeviceType type);
};

CPP file

/*
h264 decoder c++ file
*/

#include <iostream>
#include "HwH264Decoder.h"

static enum AVPixelFormat hw_pix_fmt;
static AVBufferRef *hw_device_ctx = NULL;

static enum AVPixelFormat get_hw_format(AVCodecContext *ctx, const enum AVPixelFormat *pix_fmts)
{
	const enum AVPixelFormat *p;

	for (p = pix_fmts; *p != -1; p++) {
		if (*p == hw_pix_fmt)
			return *p;
	}

	fprintf(stderr, "Failed to get HW surface format.\n");
	return AV_PIX_FMT_NONE;
}

HwH264Decoder::HwH264Decoder()
{
	LOG_INFO << "init HwH264Decoder";

	//cuda, dxva2, qsv, d3d11va
	type = av_hwdevice_find_type_by_name("cuda");
	if (type == AV_HWDEVICE_TYPE_NONE) {
		LOG_ERROR << "Device type cuda is not supported.";
		LOG_INFO << "Available device types:";
		while ((type = av_hwdevice_iterate_types(type)) != AV_HWDEVICE_TYPE_NONE)
			LOG_INFO << av_hwdevice_get_type_name(type);
	}

	decoder = avcodec_find_decoder_by_name("h264_cuvid");
	//decoder = avcodec_find_decoder(AV_CODEC_ID_H264);
	
	//== initialize the decoder context ==--

	decoder_ctx = avcodec_alloc_context3(decoder);

	if (!decoder_ctx)
	{
		LOG_ERROR << "Could not allocate video codec context";
	}

	for (int i = 0;; i++) {
		const AVCodecHWConfig *config = avcodec_get_hw_config(decoder, i);
		if (!config) {
			LOG_ERROR << "Decoder %s does not support device type %s.\n";
		}
		if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX &&
			config->device_type == type) {
			hw_pix_fmt = config->pix_fmt;
			break;
		}
	}

	if (hw_pix_fmt == -1) {
		LOG_ERROR << "Cannot support cuda";
	}

	decoder_ctx->get_format = get_hw_format;

	if (hw_decoder_init(decoder_ctx, type) < 0)
		LOG_ERROR << "Failed to init hw decoder";

	if ((ret = avcodec_open2(decoder_ctx, decoder, nullptr)) < 0) {
		LOG_ERROR << "Failed to open codec";
	}

	av_init_packet(&packet);

	bufferImage = (unsigned char *)malloc(CAMERA_OUT_PUT_WIDTH * CAMERA_OUT_PUT_HEIGHT * 4);
	dataSize = CAMERA_OUT_PUT_WIDTH * CAMERA_OUT_PUT_HEIGHT * 4;

	LOG_INFO << "Done Init";
}

HwH264Decoder::~HwH264Decoder() {
	//free up ffmpeg
	avcodec_free_context(&decoder_ctx);
	av_buffer_unref(&hw_device_ctx);

	av_frame_free(&frame);
	av_frame_free(&sw_frame);
	av_frame_free(&destinationRGBAFrame);
	free(bufferImage);
}

enum AVPixelFormat HwH264Decoder::findFmtByHwType(const enum AVHWDeviceType type)
{
	enum AVPixelFormat fmt;

	switch (type) {
	case AV_HWDEVICE_TYPE_VAAPI:
		fmt = AV_PIX_FMT_VAAPI;
		break;
	case AV_HWDEVICE_TYPE_DXVA2:
		fmt = AV_PIX_FMT_DXVA2_VLD;
		break;
	case AV_HWDEVICE_TYPE_D3D11VA:
		fmt = AV_PIX_FMT_D3D11;
		break;
	case AV_HWDEVICE_TYPE_VDPAU:
		fmt = AV_PIX_FMT_VDPAU;
		break;
	case AV_HWDEVICE_TYPE_VIDEOTOOLBOX:
		fmt = AV_PIX_FMT_VIDEOTOOLBOX;
		break;
	case AV_HWDEVICE_TYPE_CUDA:
		fmt = AV_PIX_FMT_CUDA;
		break;
	default:
		fmt = AV_PIX_FMT_NONE;
		break;
	}

	return fmt;
}

int HwH264Decoder::hw_decoder_init(AVCodecContext *ctx, const enum AVHWDeviceType type)
{
	int err = 0;

	if ((err = av_hwdevice_ctx_create(&hw_device_ctx, type,
		NULL, NULL, 0)) < 0) {
		LOG_ERROR << "Failed to create specified HW device.";
		return err;
	}
	ctx->hw_device_ctx = av_buffer_ref(hw_device_ctx);

	return err;
}

bool HwH264Decoder::decodeFrame(RawH264Frame* rawFrame, RGBAFrame* outFrame) {
	if (rawFrame == NULL) {
		return false;
	}

	av_frame_free(&frame);
	av_frame_free(&sw_frame);

	int framefinished = 0;

	packet.data = (uint8_t *)rawFrame->data;
	packet.size = rawFrame->dataSize;

	ret = avcodec_send_packet(decoder_ctx, &packet);
	if (ret < 0) {
		LOG_ERROR << "Error during decoding";
		return false;
	}

	if (!(frame = av_frame_alloc()) || !(sw_frame = av_frame_alloc())) {
		LOG_ERROR << "Can not alloc frame";
		return false;
	}

	ret = avcodec_receive_frame(decoder_ctx, frame);
	if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
		return false;
	}
	else if (ret < 0) {
		LOG_ERROR << "Error while decoding";
		return false;
	}

	if (frame->format == hw_pix_fmt) {
		/* retrieve data from GPU to CPU */
		if ((ret = av_hwframe_transfer_data(sw_frame, frame, 0)) < 0) {
			LOG_ERROR << "Error transferring the data to system memory";
		}
		tmp_frame = sw_frame;
	}
	else
		tmp_frame = frame;


	SwsContext * colorConversionContext = 0;

	colorConversionContext = sws_getCachedContext(colorConversionContext,
		decoder_ctx->width, decoder_ctx->height, decoder_ctx->pix_fmt, CAMERA_OUT_PUT_WIDTH, CAMERA_OUT_PUT_HEIGHT,
		AV_PIX_FMT_RGBA, SWS_BILINEAR, 0, 0, 0);

	//== hook our allocated image destination image buffer into an AVFrame ==--
	avpicture_fill((AVPicture *)destinationRGBAFrame, bufferImage, AV_PIX_FMT_RGBA, CAMERA_OUT_PUT_WIDTH, CAMERA_OUT_PUT_HEIGHT);

	if (!tmp_frame->data[0])
	{
		LOG_ERROR << "frame has no allocated buffer assigned.";
		//Free memory
		sws_freeContext(colorConversionContext);
		return false;
	}

	//== perform color conversion YUV->BGR into our image buffer ==--

	sws_scale(colorConversionContext,
		(uint8_t const * const *)tmp_frame->data,
		tmp_frame->linesize,
		0,
		decoder_ctx->height,
		destinationRGBAFrame->data,
		destinationRGBAFrame->linesize
	);


	if (!destinationRGBAFrame->data[0])
	{
		LOG_ERROR << "unable to convert to bgr.";
		//Free memory
		sws_freeContext(colorConversionContext);
		return false;
	}

	outFrame->dataSize = dataSize;
	outFrame->data = (unsigned char *)malloc(outFrame->dataSize);
	memcpy(outFrame->data, destinationRGBAFrame->data[0], outFrame->dataSize);
	outFrame->frameNumber = rawFrame->frameNumber;
	outFrame->timestamp = rawFrame->timestamp;
	outFrame->deviceTimestamp = rawFrame->deviceTimestamp;

	//Free memory
	sws_freeContext(colorConversionContext);
	return true;
}