The decode results are zero when compile decoder process into dynamic library

Hi,guys!
When I make use of decode function directly, the decode results are right while the decode results are zero when compile all decoder processes into dynamic library. The Video Codec SDK is 8.0.14.
This is my init process.

typedef HMODULE CUDADRIVER;
	CUDADRIVER hHandleDriver = 0;
	cuInit(0, __CUDA_API_VERSION, hHandleDriver);
	cuvidInit(0); 

				  //init cuda
	CUcontext cudaCtx;
	CUdevice device;
	cuDeviceGet(&device, 0);
	cuCtxCreate(&cudaCtx, CU_CTX_SCHED_YIELD, device);

	CUvideoctxlock ctxLock;
	cuvidCtxLockCreate(&ctxLock, cudaCtx);
        for (int i = 0; i < 8; i++)
	{
		pDecoder[i] = new CudaDecoder;
		pFrameQueue[i] = new CUVIDFrameQueue(ctxLock);
		pDecoder[i]->InitVideoDecoder(ctxLock, pFrameQueue[i]);
		pFrameQueue[i]->init(pDecoder[i]->GetCUVIDEOFORMAT().coded_width, pDecoder[i]->GetCUVIDEOFORMAT().coded_height);
	}

And this is my process of filling data and fetching decoded results. In the scenario, I need to decode continuous frames from eight channels.

while (true)
	{	
		for (int i = 0; i < 8; i++)
		{
			int channel = md->mdi[i].channel;

			CUVIDSOURCEDATAPACKET pBS;

			while (true) {
		                if (decoder->q[channel]->empty()) continue;

		                unsigned char *p;
		                decoder->q[channel]->pop(p);
		                int frame_len;
		                decoder->len_q[channel]->pop(frame_len);

		                pBS.payload = p;
		                pBS.payload_size = frame_len;
		                pBS.flags = CUVID_PKT_TIMESTAMP;
		                pBS.timestamp = stamp[channel];
		                stamp[channel]++;
		                break;
	                }

			void *pUser = pDecoder[channel];
			pDecoder[channel]->HandleVideoData(pUser, &pBS);
		}
		
		for (int i = 0; i < 8; i++)
		{
			int channel = md->mdi[i].channel;

			unsigned char *decodedSurface;
			if (!(pFrameQueue[channel]->isEndOfDecode() || pFrameQueue[channel]->isEmpty())) {
				decodedSurface = (unsigned char *)malloc(1536 * 2048 * 3 / 2 * sizeof(unsigned char));
				CUVIDPARSERDISPINFO pInfo;
				if (pFrameQueue[channel]->dequeue(&pInfo)) {
					CUdeviceptr dMappedFrame = 0;
					unsigned int pitch;
					CUVIDPROCPARAMS oVPP = { 0 };
					oVPP.progressive_frame = pInfo.progressive_frame;
					oVPP.second_field = 0;
					oVPP.top_field_first = pInfo.top_field_first;
					oVPP.unpaired_field = (pInfo.progressive_frame == 1 || pInfo.repeat_first_field <= 1);
					cuvidMapVideoFrame(pDecoder[channel]->GetDecoder(), pInfo.picture_index, &dMappedFrame, &pitch, &oVPP);
					printf("loop: %d  channel: %d   stamp:%d   pic_index:%d\n", i, channel, pInfo.timestamp, pInfo.picture_index);
					unsigned int nv12_size = pitch * (pDecoder[channel]->GetCUVIDEOFORMAT().coded_height + pDecoder[channel]->GetCUVIDEOFORMAT().coded_height / 2);  // 12bpp  	
					CCtxAutoLock lck(pDecoder[channel]->m_ctxLock);
					CUresult oResult = cuMemcpyDtoH(decodedSurface, dMappedFrame, nv12_size);

					assert(oResult == CUDA_SUCCESS);
					//释放GPU中的内存
					cuvidUnmapVideoFrame(pDecoder[channel]->GetDecoder(), dMappedFrame);
					pFrameQueue[channel]->releaseFrame(&pInfo);
				}

			}
		}
	}

Hi,

  1. Which codec is this?
  2. It seems that you are able to do a standalone decoding successfully, this means the underlying driver is behaving as expected.
  3. Are you seeing failures in any of the NVDECODEAPIs in your application?

Thanks,
Ryan Park