Hi, i’m trying to develop a low latency decoder that use nvcuvid api for transcoding purpose. I try to understand the use of api by studying VideoDecoder open GL sample in CUDA sdk. I develop on windows xp using Microsoft compiler Cl, a Win32 application (not CLR application), and i’ve a NVIDIA 8600GT. I’ve installed cuda 2.2.
In the next code i call a constructor where I create a CUvideodecoder but the function return me as result CUresult code equal 100, CUDA_ERROR_NO_DEVICE, but when before i call cutilDrvGetMaxGflopsDeviceId() the devide seems correctly recognized.
Code of the main.
#define SRC_WIDTH 352
#define SRC_HEIGHT 288
#define CODEC_TYPE cudaVideoCodec_H264
#define CHROMA_FORMAT cudaVideoChromaFormat_420
using namespace std;
VideoDecoder *videoDecoder;
VideoParser *videoParser;
FrameQueue *frameQueue;
CUVIDEOFORMAT videoFormatInfo;
cudaVideoCreateFlags videoCreateFlags;
CUcontext ctx=0;
CUvideoctxlock ctxLock = NULL;
CUdevice device = 0;
int setCUVIDEOFORMATINFO( CUVIDEOFORMAT *info )
{
info->codec = CODEC_TYPE;
info->coded_width = SRC_WIDTH;
info->coded_height = SRC_HEIGHT;
info->chroma_format = CHROMA_FORMAT;
return 1;
}
int main(int argc,char **argv)
{
CUVIDSOURCEDATAPACKET packet;
int i_frame = 0;
const char *filename="Carlitos_Way_transcoded00.264";
// Initialize CUDA
cuInit(0);
// Check for a min spec of Compute 1.1 capability before running
if (!cutilDrvCudaCapabilities(1,1)) {
cutilExit(0, NULL);
}
CUdevice cuda_device;
cuda_device = cutilDrvGetMaxGflopsDeviceId();
cutilDrvSafeCallNoSync(cuDeviceGet(&device, cuda_device ));
// Create CUDA Device w/ GL interop
// (use CU_CTX_BLOCKING_SYNC for better CPU synchronization)
cuGLCtxCreate(&ctx, CU_CTX_BLOCKING_SYNC, device);
CCtxAutoLock lck(ctxLock);
videoCreateFlags = cudaVideoCreate_Default;
memset( &videoFormatInfo, 0, sizeof(CUVIDEOFORMAT));
setCUVIDEOFORMATINFO( &videoFormatInfo );
frameQueue = new FrameQueue();
videoDecoder = new VideoDecoder( videoFormatInfo, ctx, videoCreateFlags, ctxLock );
//Other code after the error
}
Code of videoDecoder constructor, where i’ve the error.
VideoDecoder::VideoDecoder(const CUVIDEOFORMAT & rVideoFormat,
CUcontext &rContext,
cudaVideoCreateFlags eCreateFlags,
CUvideoctxlock &ctx)
: m_CtxLock(ctx)
{
// get a copy of the CUDA context
m_Context = rContext;
m_VideoCreateFlags = eCreateFlags;
printf("> VideoDecoder::cudaVideoCreateFlags = <%d>", (int)eCreateFlags);
switch (eCreateFlags) {
case cudaVideoCreate_Default: printf("Default (VP)\n"); break;
case cudaVideoCreate_PreferCUDA: printf("Use CUDA decoder\n"); break;
case cudaVideoCreate_PreferDXVA: printf("Use DXVA decoder\n"); break;
default: printf("Unknown value\n"); break;
}
// Validate video format. Currently only a subset is
// supported via the cuvid API.
cudaVideoCodec eCodec = rVideoFormat.codec;
assert(cudaVideoCodec_MPEG1 == eCodec || cudaVideoCodec_MPEG2 == eCodec || cudaVideoCodec_VC1 == eCodec || cudaVideoCodec_H264 == eCodec);
assert(cudaVideoChromaFormat_420 == rVideoFormat.chroma_format);
// Fill the decoder-create-info struct from the given video-format struct.
memset(&oVideoDecodeCreateInfo_, 0, sizeof(CUVIDDECODECREATEINFO));
// Create video decoder
oVideoDecodeCreateInfo_.CodecType = rVideoFormat.codec;
oVideoDecodeCreateInfo_.ulWidth = rVideoFormat.coded_width;
oVideoDecodeCreateInfo_.ulHeight = rVideoFormat.coded_height;
oVideoDecodeCreateInfo_.ulNumDecodeSurfaces = FrameQueue::cnMaximumSize;
// Limit decode memory to 24MB (16M pixels at 4:2:0 = 24M bytes)
while (oVideoDecodeCreateInfo_.ulNumDecodeSurfaces * rVideoFormat.coded_width * rVideoFormat.coded_height > 16*1024*1024)
{
oVideoDecodeCreateInfo_.ulNumDecodeSurfaces--;
}
oVideoDecodeCreateInfo_.ChromaFormat = rVideoFormat.chroma_format;
oVideoDecodeCreateInfo_.OutputFormat = cudaVideoSurfaceFormat_NV12;
oVideoDecodeCreateInfo_.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive;
// No scaling
oVideoDecodeCreateInfo_.ulTargetWidth = oVideoDecodeCreateInfo_.ulWidth;
oVideoDecodeCreateInfo_.ulTargetHeight = oVideoDecodeCreateInfo_.ulHeight;
oVideoDecodeCreateInfo_.ulNumOutputSurfaces = 2; // We won't simultaneously map more than 2 surfaces
oVideoDecodeCreateInfo_.ulCreationFlags = m_VideoCreateFlags;
oVideoDecodeCreateInfo_.vidLock = ctx;
// create the decoder
CUresult oResult = cuvidCreateDecoder(&oDecoder_, &oVideoDecodeCreateInfo_);
printf("oResult=%d\n",oResult);
//HERE I'VE THE ERROR
assert(CUDA_SUCCESS == oResult);
}
I’ve the error after cuvidCreateDecoder().
What could be the problem?
Many thanks.
Best regards.