Decoder SDK - how to access decoded frame?

Al_Chisholm · April 24, 2017, 2:02pm

I’m trying to use the basic 5 method Decoder API (I want to write my own parser and source which is actually what your documentation recommends).
I believe I understand how to create the decoder and how to directly deliver the compressed H264 frames to DecodePicture(). However it is not at all clear how and when to obtain the decoded NV12 frames. The documentation is a bit sparse and the sample code relies on a somewhat magical call back from your parser. I see where the callback is passed to the parser but cant figure out how this connects to the actual Decoder API. What am I missing?

electrodynamics · April 29, 2017, 5:11pm

Maybe this will help. If you need the code to inject NALUs, let me know.

//---------------------------------------------------------------------------
// main.cpp
//
// cuviddec decode sample frontend
//
// Copyright  2008 NVIDIA Corporation. All rights reserved.
//---------------------------------------------------------------------------

#include <stdio.h>
#include <string.h>
#include <d3d9.h>
#include "nvcuvid.h"
#include "nalu.h"

#define USE_NALUS 0
#define USE_ASYNC_COPY  0
#define MAX_FRM_CNT     20
#define DISPLAY_DELAY   2   // Attempt to decode up to 2 frames ahead of display

typedef struct
{
    CUvideoparser cuParser;
    CUvideodecoder cuDecoder;
    CUstream cuStream;
    CUVIDDECODECREATEINFO dci;
    CUVIDPARSERDISPINFO DisplayQueue[DISPLAY_DELAY];
    unsigned char *pRawNV12;
    int raw_nv12_size;
    int pic_cnt;
    int display_pos;
    FILE *fd_yuv;
} DecodeSession;
DecodeSession State;

static int DisplayPicture(DecodeSession *state, CUVIDPARSERDISPINFO *pPicParams);

#if USE_NALUS
__int64 startpos;
extern FILE *bits;
extern int IsFirstByteStreamNALU;
extern void InitializeBuffer(void);
extern NALU_t *nalu;
extern int GetAnnexbNALU (NALU_t *nalu, __int64 *start_code_pos);
#endif

//////////////////////////////////////////////////////////////////////////////////////////////////////////
//
// Parser callbacks
//

// Called when the decoder encounters a video format change (or initial sequence header)
static int CUDAAPI HandleVideoSequence(void *pvUserData, CUVIDEOFORMAT *pFormat)
{
    DecodeSession *state = (DecodeSession *)pvUserData;
    
    if ((pFormat->codec != state->dci.CodecType)
     || (pFormat->coded_width != state->dci.ulWidth)
     || (pFormat->coded_height != state->dci.ulHeight)
     || (pFormat->chroma_format != state->dci.ChromaFormat))
    {
        if (state->cuDecoder)
        {
            cuvidDestroyDecoder(state->cuDecoder);
            state->cuDecoder = NULL;
        }
        memset(&state->dci, 0, sizeof(CUVIDDECODECREATEINFO));
        state->dci.ulWidth = pFormat->coded_width;
        state->dci.ulHeight = pFormat->coded_height;
        state->dci.ulNumDecodeSurfaces = MAX_FRM_CNT;
        state->dci.CodecType = pFormat->codec;
        state->dci.ChromaFormat = pFormat->chroma_format;
        // Output (pass through)
        state->dci.OutputFormat = cudaVideoSurfaceFormat_NV12;
        state->dci.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave; // No deinterlacing
        state->dci.ulTargetWidth = state->dci.ulWidth;
        state->dci.ulTargetHeight = state->dci.ulHeight;
        state->dci.ulNumOutputSurfaces = 1;
        // Create the decoder
        if (CUDA_SUCCESS != cuvidCreateDecoder(&state->cuDecoder, &state->dci))
        {
            printf("Failed to create video decoder\n");
            return 0;
        }
    }
    return 1;
}

// Called by the video parser to decode a single picture
// Since the parser will deliver data as fast as it can, we need to make sure that the picture
// index we're attempting to use for decode is no longer used for display
static int CUDAAPI HandlePictureDecode(void *pvUserData, CUVIDPICPARAMS *pPicParams)
{
    DecodeSession *state = (DecodeSession *)pvUserData;
    CUresult result;
    int flush_pos;

    if (pPicParams->CurrPicIdx < 0) // Should never happen
    {
        printf("Invalid picture index\n");
        return 0;
    }
    // Make sure that the new frame we're decoding into is not still in the display queue
    // (this could happen if we do not have enough free frame buffers to handle the max delay)
    flush_pos = state->display_pos; // oldest frame
    for (;;)
    {
        bool frame_in_use = false;
        for (int i=0; i<DISPLAY_DELAY; i++)
        {
            if (state->DisplayQueue[i].picture_index == pPicParams->CurrPicIdx)
            {
                frame_in_use = true;
                break;
            }
        }
        if (!frame_in_use)
        {
            // No problem: we're safe to use this frame
            break;
        }
        // The target frame is still pending in the display queue:
        // Flush the oldest entry from the display queue and repeat
        if (state->DisplayQueue[flush_pos].picture_index >= 0)
        {
            DisplayPicture(state, &state->DisplayQueue[flush_pos]);
            state->DisplayQueue[flush_pos].picture_index = -1;
        }
        flush_pos = (flush_pos + 1) % DISPLAY_DELAY;
    }
    result = cuvidDecodePicture(state->cuDecoder, pPicParams);
    if (result != CUDA_SUCCESS)
    {
        printf("cuvidDecodePicture: %d\n", result);
    }
    return (result == CUDA_SUCCESS);
}

// Called by the video parser to display a video frame (in the case of field pictures, there may be
// 2 decode calls per 1 display call, since two fields make up one frame)
static int CUDAAPI HandlePictureDisplay(void *pvUserData, CUVIDPARSERDISPINFO *pPicParams)
{
    DecodeSession *state = (DecodeSession *)pvUserData;
    
    if (state->DisplayQueue[state->display_pos].picture_index >= 0)
    {
        DisplayPicture(state, &state->DisplayQueue[state->display_pos]);
        state->DisplayQueue[state->display_pos].picture_index = -1;
    }
    state->DisplayQueue[state->display_pos] = *pPicParams;
    state->display_pos = (state->display_pos + 1) % DISPLAY_DELAY;
    return TRUE;
}

static int DisplayPicture(DecodeSession *state, CUVIDPARSERDISPINFO *pPicParams)
{
    CUVIDPROCPARAMS vpp;
    CUdeviceptr devPtr;
    CUresult result;
    unsigned int pitch = 0, w, h;
    int nv12_size;
                
    memset(&vpp, 0, sizeof(vpp));
    vpp.progressive_frame = pPicParams->progressive_frame;
    vpp.top_field_first = pPicParams->top_field_first;
    result = cuvidMapVideoFrame(state->cuDecoder, pPicParams->picture_index, &devPtr, &pitch, &vpp);
    if (result != CUDA_SUCCESS)
    {
        printf("cuvidMapVideoFrame: %d\n", result);
        return 0;
    }
    w = state->dci.ulTargetWidth;
    h = state->dci.ulTargetHeight;
    nv12_size = pitch * (h + h/2);  // 12bpp
    if ((!state->pRawNV12) || (nv12_size > state->raw_nv12_size))
    {
        state->raw_nv12_size = 0;
        if (state->pRawNV12)
        {
            cuMemFreeHost(state->pRawNV12);    // Just to be safe (the pitch should be constant)
            state->pRawNV12 = NULL;
        }
        result = cuMemAllocHost((void**)&state->pRawNV12, nv12_size);
        if (result != CUDA_SUCCESS)
            printf("cuMemAllocHost failed to allocate %d bytes (%d)\n", nv12_size, result);
        state->raw_nv12_size = nv12_size;
    }
    if (state->pRawNV12)
    {
    #if USE_ASYNC_COPY
        result = cuMemcpyDtoHAsync(state->pRawNV12, devPtr, nv12_size, state->cuStream);
        if (result != CUDA_SUCCESS)
            printf("cuMemcpyDtoHAsync: %d\n", result);
        // Gracefully wait for async copy to complete
        while (CUDA_ERROR_NOT_READY == cuStreamQuery(state->cuStream))
        {
            Sleep(1);
        }
    #else
        result = cuMemcpyDtoH(state->pRawNV12, devPtr, nv12_size);
    #endif
    }
    cuvidUnmapVideoFrame(state->cuDecoder, devPtr);
    // Convert the output to standard IYUV and dump it to disk (note: very inefficient)
    if ((state->fd_yuv) && (state->pRawNV12))
    {
        unsigned int y;
        const unsigned char *p = state->pRawNV12;
        unsigned char *iyuv = new unsigned char [w*h+w*(h>>1)];
        
        // Copy luma
        for (y=0; y<h; y++)
        {
            memcpy(iyuv+y*w, p+y*pitch, w);
        }
        // De-interleave chroma (NV12 stored as U,V,U,V,...)
        p += h*pitch;
        for (y=0; y<h/2; y++)
        {
            for (unsigned int x=0; x<w/2; x++)
            {
                iyuv[w*h+y*w/2+x] = p[y*pitch+x*2];
                iyuv[w*h+(h/2)*(w/2)+y*w/2+x] = p[y*pitch+x*2+1];
            }
        }
        fwrite(iyuv, 1, w*h+w*(h/2), state->fd_yuv);
        delete iyuv;
    }
    state->pic_cnt++;
    return 1;
}

//////////////////////////////////////////////////////////////////////////////////////////////////////////
//
// CUDA 2.0 initialization
//

IDirect3D9 *g_pD3D = NULL;
IDirect3DDevice9 *g_pD3Dev = NULL;
CUcontext g_cuContext = NULL;
CUdevice g_cuDevice = NULL;

static bool InitCuda()
{
    CUresult err;
    int i, lAdapter, lAdapterCount;

    err = cuInit(0);
    if (err != CUDA_SUCCESS)
    {
        printf("ERROR: cuInit failed (%d)\n", err);
        return false;
    }
	cuDeviceGetCount(&lAdapterCount);
	for (lAdapter = 0; lAdapter < lAdapterCount; lAdapter++)
	{
		err = cuDeviceGet(&g_cuDevice, lAdapter);
		if (err != CUDA_SUCCESS)
			continue;
		err = cuCtxCreate(&g_cuContext, 0, g_cuDevice);
		if (err == CUDA_SUCCESS)
		{
			// Init display queue
			for (i=0; i<DISPLAY_DELAY; i++)
			{
				State.DisplayQueue[i].picture_index = -1;   // invalid
			}
			State.display_pos = 0;

			return true;
		}
	}
	printf("ERROR: Failed to create CUDA context (%d)\n", err);
    return false;
}

static bool DeinitCuda()
{
    if (g_cuContext)
    {
        CUresult err = cuCtxDestroy(g_cuContext);
        if (err != CUDA_SUCCESS)
            printf("WARNING: cuCtxDestroy failed (%d)\n", err);
        g_cuContext = NULL;
    }
    return true;
}

int main(int argc, char *argv[])
{
#if USE_NALUS
#else
    unsigned char io_buffer[16*1024];
#endif
    CUVIDPARSERPARAMS parserInitParams;
    CUstream cuStream = NULL;
    CUresult result;
    char *arg_input = NULL;
    char *arg_output = NULL;
    FILE *fInput = NULL;
    int retval = 1;
    int i, elapsed_time;

    if (argc < 2)
    {
        fprintf(stderr, "cuvid input.264 [output.yuv]\n");
        return retval;
    }

    for (i=1; i<argc; i++)
    {
        if (!arg_input)
            arg_input = argv[i];
        else if (!arg_output)
            arg_output = argv[i];
        else
        {
            fprintf(stderr, "invalid parameter\n");
            return retval;
        }
    }

    if (!arg_input)
    {
        fprintf(stderr, "input file not specified\n");
        return retval;
    }
    memset(&State, 0, sizeof(State));
    timeBeginPeriod(1);
    
    // Initialize CUDA 2.0 using with D3D9 interoperability
    if (!InitCuda())
    {
        printf("Failed to initialize CUDA 2.0\n");
        goto exit;
    }
    // Create video parser
    memset(&parserInitParams, 0, sizeof(parserInitParams));
    parserInitParams.CodecType = cudaVideoCodec_H264;
    parserInitParams.ulMaxNumDecodeSurfaces = MAX_FRM_CNT;
//	parserInitParams.ulErrorThreshold = 100;
	parserInitParams.ulMaxDisplayDelay = 4;
	parserInitParams.ulClockRate = 1000000000;
	parserInitParams.pUserData = &State;
    parserInitParams.pfnSequenceCallback = HandleVideoSequence;
    parserInitParams.pfnDecodePicture = HandlePictureDecode;
    parserInitParams.pfnDisplayPicture = HandlePictureDisplay;
	result = cuvidCreateVideoParser(&State.cuParser, &parserInitParams);
    if (result != CUDA_SUCCESS)
    {
        printf("Failed to create video parser (%d)\n", result);
        goto exit;
    }
    result = cuStreamCreate(&State.cuStream, 0);
    if (result != CUDA_SUCCESS)
    {
        printf("cuStreamCreate failed (%d)\n", result);
        goto exit;
    }
#if USE_NALUS
    fopen_s(&bits, arg_input, "rb");
    if (bits == NULL)
    {
        printf("Failed to open \"%s\"\n", arg_input);
        goto exit;
    }
	IsFirstByteStreamNALU = 1;
	_fseeki64(bits, 0, SEEK_SET);
	InitializeBuffer();
#else
    // Open input file
    fopen_s(&fInput, arg_input, "rb");
    if (fInput == NULL)
    {
        printf("Failed to open \"%s\"\n", arg_input);
        goto exit;
    }
#endif
    // Open output file
    if (arg_output)
    {
        fopen_s(&State.fd_yuv, arg_output, "wb");
        if (State.fd_yuv == NULL)
        {
            printf("Failed to create \"%s\"\n", arg_output);
            goto exit;
        }
    }
    // Start decoding
    elapsed_time = timeGetTime();
	for (;;)
    {
        CUVIDSOURCEDATAPACKET pkt;
#if USE_NALUS
		int len = GetAnnexbNALU(nalu, &startpos);
#else
		int len = (int) fread(io_buffer, 1, sizeof(io_buffer), fInput);
#endif
        
        if (len <= 0)
        {
            // Flush the decoder
            pkt.flags = CUVID_PKT_ENDOFSTREAM;
            pkt.payload_size = 0;
            pkt.payload = NULL;
            pkt.timestamp = 0;
            cuvidParseVideoData(State.cuParser, &pkt);
            break;
        }
        pkt.flags = 0;
#if USE_NALUS
        pkt.payload_size = nalu->len;
        pkt.payload = nalu->buf;
#else
        pkt.payload_size = len;
        pkt.payload = io_buffer;
#endif
        pkt.timestamp = 0;  // not using timestamps
        if (cuvidParseVideoData(State.cuParser, &pkt) != CUDA_SUCCESS)
		{
			// Error.
		}
    }
    // Flush display queue
    for (i=0; i<DISPLAY_DELAY; i++)
    {
        if (State.DisplayQueue[State.display_pos].picture_index >= 0)
        {
            DisplayPicture(&State, &State.DisplayQueue[State.display_pos]);
            State.DisplayQueue[State.display_pos].picture_index = -1;
        }
        State.display_pos = (State.display_pos + 1) % DISPLAY_DELAY;
    }
    elapsed_time = timeGetTime() - elapsed_time;
    retval = 0;
    printf("Processed %d frames in %dms (%5.2ffps)\n",
        State.pic_cnt, elapsed_time, ((float)State.pic_cnt*1000.0/(float)elapsed_time));
exit:
    if (State.fd_yuv)
    {
        fclose(State.fd_yuv);
        State.fd_yuv = NULL;
    }
    // Delete all created objects
    if (State.cuParser != NULL)
    {
        cuvidDestroyVideoParser(State.cuParser);
        State.cuParser = NULL;
    }
    if (State.cuDecoder != NULL)
    {
        cuvidDestroyDecoder(State.cuDecoder);
        State.cuDecoder = NULL;
    }
    if (State.cuStream != NULL)
    {
        cuStreamDestroy(State.cuStream);
        State.cuStream = NULL;
    }
    if (State.pRawNV12)
    {
        cuMemFreeHost(State.pRawNV12);
        State.pRawNV12 = NULL;
    }
    DeinitCuda();
    timeEndPeriod(1);
    return retval;
}

BenLag · May 17, 2017, 3:07pm

Hi,
I test this code to decode frame by frame H264 stream, but I dont knwon why when cuvidParseVideoData is called, I don’t understand why HandlePictureDecode is not called …but after the call it always on CUDA_SUCCES. and nothing seems to be decoded…
Could you please give details or explanation ?

I am using cuda 8.0 on ubuntu.

regards

electrodynamics · May 17, 2017, 3:23pm

The provided code works fine so you must have changed things to go “frame-by-frame”. Please provide your code for inspection. It’s not possible to guess what you did and where you may have gone wrong.

It seems that CUVID thinks you have not injected a full frame and is waiting for more data.

The code I gave sends chunks of data into the parser. It’s possible that more than one chunk is needed for a frame, so you would see multiple parser calls before you get a callback.

Al_Chisholm · May 17, 2017, 9:32pm

Electrodynamics, Thanks for that code!
For some reason I didn’t get the email until today indicating that someone had replied to my post. I am happy to use the standard parser object (although I wish it was actually documented!) but was having trouble replacing the ‘stream’ object used to read the file in the original NVIDIA sample code which seemed a bit tightly coupled. Was also having trouble following the data flow.
In my app the frames may be arriving from a file, an H264 camera via RTSP, a memory buffer, or a custom network protocol used in our app. So I need to be able to easily replace the source stream logic.
Also the output frames might go to the screen or to a memory buffer for some analytics.
Your code appears to show me exactly how to do both of those things.
I’m working on another project at the moment but will try to get back to this soon and take a more detailed look at your code.

Ideally I want to create a DirectShow decoder like I did with the Intel Decoder SDK (I know… DirectShow technology is getting a bit old but I’m stuck with it for the moment…)

Thanks again!

Al_Chisholm · May 17, 2017, 9:37pm

Also sure wish I could cut and paste the code out of your post without picking up the 468 line numbers at the beginning of each line… :-/

BenLag · May 18, 2017, 8:34am

Hi,
Indeed maybe buffer is not full to pass to decoder function, but I bypass NALU function, i didn’t see any information about this package.

The code doesn’t crash but don’t decode.

My code :

parser callbacks
//////////////////////////////////////////////////////////////////////////////////////////////////////////
//
// CUDA 2.0 initialization
//
CUcontext g_cuContext = NULL;
CUdevice g_cuDevice = NULL;

static bool InitCuda()

…
}

static bool DeinitCuda()
{

…
}

int main(int argc, char *argv[])
{
    unsigned char io_buffer[16*1024];
    CUVIDPARSERPARAMS parserInitParams;
    CUstream cuStream = NULL;
    CUresult result;
    char *arg_input = NULL;
    char *arg_output = NULL;
    FILE *fInput = NULL;
    int retval = 1;
    int i, elapsed_time;
int index= 0;

…
printf(“CUDA 0\n”);
// Create video parser
memset(&parserInitParams, 0, sizeof(parserInitParams));
parserInitParams.CodecType = cudaVideoCodec_H264;
parserInitParams.ulMaxNumDecodeSurfaces = MAX_FRM_CNT;
parserInitParams.ulMaxDisplayDelay = 4;
parserInitParams.ulClockRate = 1000000000;
parserInitParams.pUserData = &State;
parserInitParams.pfnSequenceCallback = HandleVideoSequence;
parserInitParams.pfnDecodePicture = HandlePictureDecode2;
parserInitParams.pfnDisplayPicture = HandlePictureDisplay2;
result = cuvidCreateVideoParser(&State.cuParser, &parserInitParams);
printf(“CUDA 1\n”);
…
// Open input file
fopen_s(&fInput, arg_input, “rb”);
if (fInput == NULL)
{
printf(“Failed to open "%s"\n”, arg_input);
goto exit;
}
// Open output file
if (arg_output)
{
fopen_s(&State.fd_yuv, arg_output, “wb”);
if (State.fd_yuv == NULL)
{
printf(“Failed to create "%s"\n”, arg_output);
goto exit;
}
}
// Start decoding
//elapsed_time = timeGetTime();

	for (;;)
    {
    index++;
    printf("Index%d\n",index);	
        CUVIDSOURCEDATAPACKET pkt;
        int len = (int) fread(io_buffer, 1, sizeof(io_buffer), fInput);
        
        if (len <= 0)
        {
            // Flush the decoder
            pkt.flags = CUVID_PKT_ENDOFSTREAM;
            pkt.payload_size = 0;
            pkt.payload = NULL;
            pkt.timestamp = 0;
            cuvidParseVideoData(State.cuParser, &pkt);
            break;
        }
        pkt.flags = 0;
        pkt.payload_size = len;
        pkt.payload = io_buffer;
        pkt.timestamp = 0;  // not using timestamps
        if (cuvidParseVideoData(State.cuParser, &pkt) != CUDA_SUCCESS)
	    {
			volatile int g = 0;
	printf(" Error %d \n",g );
	    }

    }
printf("Number %d\n",State.DisplayQueue[State.display_pos].picture_index) ;

    // Flush display queue
    for (i=0; i<DISPLAY_DELAY; i++)
    {
        if (State.DisplayQueue[State.display_pos].picture_index >= 0)
        {
	printf(" Display 1\n");
            DisplayPicture(&State, &State.DisplayQueue[State.display_pos]);
            State.DisplayQueue[State.display_pos].picture_index = -1;
        }
        State.display_pos = (State.display_pos + 1) % DISPLAY_DELAY;
    }
    elapsed_time = 0 ; //timeGetTime() - elapsed_time;
    retval = 0;
    printf("Processed %d frames in %dms (%5.2ffps)\n",
        State.pic_cnt, elapsed_time, ((float)State.pic_cnt*1000.0/(float)elapsed_time));

…

    return retval;
}

electrodynamics · May 18, 2017, 12:33pm

You wrote:

“Indeed maybe buffer is not full to pass to decoder function, but i am huge to bypass NALU function, i didn’t see any information about this package.”

I don’t know what you mean by “huge to bypass”. That sounds like you don’t want to inject data by NALUs. If, however, you do then as I said in my first reply I can give you the code to do that.

If you just have an arbitrary data source, make a thread that just keeps injecting the data into the parser as it arrives and use the display callback to pace things.

You have posted your code without explanation or any questions. Is it not working or it is working? Did you have a question about it?

BenLag · May 18, 2017, 12:59pm

Hello,
Sorry for the lack of explanations.

This code is not working, I didn’t found many information about NALU and file nalu.h, so I bypassed it.

But I don’t understand why the function HandlePictureDecode2 callback is not called during cuvidParseVideoData, because when I add a printf I don’t see it during execution. My opinion is that’s the API function HandlePictureDecode which is called and not this one(in the file). But in this case i don’t see if the frame is decoded…So maybe you can tell me how check this.

I am aware that decoding needs severals packet to called the decode function, but i don’t see any called of previous function during all the parsing of video file.

For NALU I don’t really see the interest ? If you have some explanations ? and code ?

My aim is to use the decode function frame by frame to be able to transmit it(decompressed) over a local network.

Should you give an advise?

Thanks

BenLag · May 18, 2017, 1:16pm

for details :

    [b]//Init decoding[/b]

    parserInitParams.CodecType = cudaVideoCodec_H264;
    parserInitParams.ulMaxNumDecodeSurfaces = MAX_FRM_CNT;
	parserInitParams.ulMaxDisplayDelay = 4;
	parserInitParams.ulClockRate = 1000000000;
	parserInitParams.pUserData = &State;
    parserInitParams.pfnSequenceCallback = HandleVideoSequence2;
    parserInitParams.pfnDecodePicture = HandlePictureDecode2;
    parserInitParams.pfnDisplayPicture = HandlePictureDisplay2;
	result = cuvidCreateVideoParser(&State.cuParser, &parserInitParams);

…

    //parse file
int len = (int) fread(io_buffer, 1, sizeof(io_buffer), fInput);

    printf("Len %d\n",len);
    if (len <= 0)
        {
            // Flush the decoder
            pkt.flags = CUVID_PKT_ENDOFSTREAM;
            pkt.payload_size = 0;
            pkt.payload = NULL;
            pkt.timestamp = 0;
            cuvidParseVideoData(State.cuParser, &pkt);
            break;
        }
        pkt.flags = 0;

        pkt.payload_size = len;
        pkt.payload = io_buffer;

        pkt.timestamp = 0;  // not using timestamps
    printf("Parse video data \n");

        [b]//Call cuda parser and decoder[/b]

        if (cuvidParseVideoData(State.cuParser, &pkt) != CUDA_SUCCESS)
	    {
		volatile int g = 0       ;
	printf(" Error %d \n",g );
	    }


    printf("Number %d\n",State.DisplayQueue[State.display_pos].picture_index) ;
       [i] // Here it displays always the queue is NULL but never passed on error[/i].

electrodynamics · May 18, 2017, 1:23pm

Your init sets the pfn to HandleVideoSequence2 but the code you posted has HandleVideoSequence(). So you are not giving complete consistent code, which makes it hard to help you.

First check that you get a callback to HandleVideoSequence(). You will not get decode callbacks without it. If you do not successfully execute this first callback, then your stream may be ill-formed. There must be valid SPS and PPS in there at the start of the stream.

The API does not provide HandlePictureDecode(). That is only supplied by the user. So your theory is not the cause of the problem.

If you do not need per-NALU processing then you can ignore it. I needed it in my application because I am indexing the stream and so I have to parse on a per-NALU basis.

BenLag · May 18, 2017, 1:35pm

Sorry I try to give you more elements :

I rename 3 functions and add trace at begining in new ones:
HandleVideoSequence → HandleVideoSequence2
HandlePictureDecode → HandlePictureDecode2
HandlePictureDisplay → HandlePictureDisplay2

I fill the structure with definition of the 3 function… before called of cuvidParseVideoData.

Normally the first function created a decoder object and call HandlePictureDecode.

But for me the code you post and maybe redefinition of this 3 functions is not necessary as far as i don’t see any call of your 3 function.

You can check there is a printf at beginning of function.

My problem that’s in my case the queue is always NULL and decoding doesnt work.

electrodynamics · May 18, 2017, 2:11pm

As I said, if you do not get the sequence callback then your stream data being passed to the parser must be ill-formed (assuming you have init’ed everything correctly). For example, CUVID expects Annex B NALU-based elementary streams and you must have valid SPS/PPS for AVC video at the start. Are these conditions satisfied? What kind of file are you opening? This code works only with elementary streams. If you have a container you’ll have to demux it.

You do not need to rename anything.

BenLag · May 18, 2017, 3:18pm

Re,

I finally resolved the problem. Indeed cuvidParseVideoData make precheck on the bitstream before call of HandleVideoSequence and other function

For my case, the input was not correctly formated…And the pre-check may not allow the callback to run.

Thanks for your answers.

Ben

electrodynamics · May 18, 2017, 3:21pm

Good news! I’m glad you are making progress. Post again if further problems arise.

Al_Chisholm · May 31, 2017, 4:05pm

Finally got around to trying to build your code. (Although I’ll be away all next week). I have Decoder SDK 7.1.9 for Windows. Having what are probably some dumb build problems.

I don’t have nvcuvid.h - only dynlink_nvcuvid.h also don’t have nalu.h
cuInit() footprint does not match up - you pass 1 parameter - my libraries want 3.
There seems to be a conflict in your code related to USE_NALUS related to #ifdef vs #if. You have it defined to 0 - which means defined. This results (at least in VS2013) that you fail to define io_buffer but later you fread() into the undefined io_buffer

electrodynamics · May 31, 2017, 5:54pm

nvcuvid.h is in the CUDA toolkit include directory. If you want to revise the code to use the Video SDK includes, then go for it. Regarding nalu.h, I explained that if you needed it, I would be happy to provide it.
I don’t know what you mean by “my libraries”. See here:

[url]http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__INITIALIZE.html#axzz4igCgDbac[/url]

The only effect of that typo is that an array is declared that need not be declared. If you were correct, it would not compile and, believe me, I would have noticed that. Nevertheless, I have corrected the posted code. Thank you for pointing it out.

Al_Chisholm · May 31, 2017, 6:44pm

Sorry, I should have preceded all that with ‘this is probably a dumb question but…’. I’m new to this particular toolkit. For example I didn’t realize there was a difference between the CUDA toolkit and the DECODER SDK. I’m working from the sample decoder SDK programs provided by NVIDIA which seem to have added a ‘helper’ layer to the API in the form of dynlink_nvcuvid.h. I probably screwed up my install or my paths.

Yes I would appreciate a copy of nalu.h.

2, By ‘my libraries’ I just meant my copy of the NVIDIA Decoder SDK and sample code as installed (apparently ineptly) by me.

I was just saying I do actually get a compiler error for an undefined variable (io_buffer) when building the unmodified code. I believe your code worked for you - its an interesting mystery as to why I get an error from the compiler on that same code. I was not trying to be critical or whiney.

P.S. I actually do appreciate you taking time to help someone who is a total NVIDIA noob. I will try to do my homework and only ask intelligent, reasonable questions in the future after making an honest effort to figure things out myself.

P.P.S. I do actually have a life - 5 kids, 2 grandchildren and am headed out to Utah tomorrow for a week of whitewater kayaking on the Yampa River. I honestly hope you have an equally great life! :-)

electrodynamics · May 31, 2017, 7:09pm

nVidia provides these different views but either way the basic concepts work the same. If you want to run the posted code, then set your paths to the CUDA toolset include and library directories. It could be ported to the Video SDK headers; you could do that. CUDA itself seems somehow more fundamental to me.

Send me an email at donald.graft at cantab.net and I will send you what you need. NALU parsing is an additional complication beyond the subject of this thread.

The Video SDK does not provide any CUDA libraries. You have to link cuda.lib and nvcuvid.lib from the CUDA toolset.

Probably you have Visual Studio configured to treat warnings as errors. You would have gotten a warning that the array was declared but not used. You can revise it and move on. As I said, it is inconsequential.

hlg1989 · June 23, 2017, 3:44am

hello, electrodynamics:

I want use NVDecoder to decode h264 stream one by one. I use ffmpeg to read h264 data frame which can replace video source reading data from h264 file.but there is a question:
At the first time I pass frame data to  "cuvidParseVideoData" function, the decode callback HandlePictureDecode function not be called.At the seconde time, pass frame to it, the HandlePictureDecode callback will been executed.So I suspect that the NvDecoder will decode the first frame, at the second calling   "cuvidParseVideoData" function. It will has a frame decoded delay time.But I want to directly decode current frame sync, How can I do?
Thanks for replying.

Topic		Replies	Views
NVDEC hardware CUDA_ERROR_INVALID_VALUE cuvidDecodePicture call CUDA Programming and Performance	0	2708	March 28, 2019
cuvidDecodePicture decode video frame sync Video Processing & Optical Flow	4	2583	June 27, 2017
How to decode an H264 Stream with CUDA. CUDA Programming and Performance	18	30098	March 14, 2015
Video SDK decoder or encoder have always 5 frames Buffer DPB buffer or some other frame buffer Video Processing & Optical Flow	10	3060	June 24, 2022
Video decoding: Video Processing & Optical Flow	5	4200	October 12, 2021
cuvidParseVideoData() returns succeed but there are no frame Video Processing & Optical Flow cuda	0	716	April 28, 2022
H264 CUDA stream decoder problem CUDA Programming and Performance	3	6366	November 9, 2018
NVDec did not output the correct sequence of bitstream parsing and decoding Video Processing & Optical Flow nvbugs	8	1256	June 1, 2023
Decoding problem when feeding CUvideodecoder manually Video Processing & Optical Flow	13	1952	March 28, 2018
How to use NVCUVID GPU-Accelerated Libraries	8	3221	October 5, 2013

Decoder SDK - how to access decoded frame?

Related topics