Hi,guys!
When I make use of decode function directly, the decode results are right while the decode results are zero when compile all decoder processes into dynamic library. The Video Codec SDK is 8.0.14.
This is my init process.
typedef HMODULE CUDADRIVER;
CUDADRIVER hHandleDriver = 0;
cuInit(0, __CUDA_API_VERSION, hHandleDriver);
cuvidInit(0);
//init cuda
CUcontext cudaCtx;
CUdevice device;
cuDeviceGet(&device, 0);
cuCtxCreate(&cudaCtx, CU_CTX_SCHED_YIELD, device);
CUvideoctxlock ctxLock;
cuvidCtxLockCreate(&ctxLock, cudaCtx);
for (int i = 0; i < 8; i++)
{
pDecoder[i] = new CudaDecoder;
pFrameQueue[i] = new CUVIDFrameQueue(ctxLock);
pDecoder[i]->InitVideoDecoder(ctxLock, pFrameQueue[i]);
pFrameQueue[i]->init(pDecoder[i]->GetCUVIDEOFORMAT().coded_width, pDecoder[i]->GetCUVIDEOFORMAT().coded_height);
}
And this is my process of filling data and fetching decoded results. In the scenario, I need to decode continuous frames from eight channels.
while (true)
{
for (int i = 0; i < 8; i++)
{
int channel = md->mdi[i].channel;
CUVIDSOURCEDATAPACKET pBS;
while (true) {
if (decoder->q[channel]->empty()) continue;
unsigned char *p;
decoder->q[channel]->pop(p);
int frame_len;
decoder->len_q[channel]->pop(frame_len);
pBS.payload = p;
pBS.payload_size = frame_len;
pBS.flags = CUVID_PKT_TIMESTAMP;
pBS.timestamp = stamp[channel];
stamp[channel]++;
break;
}
void *pUser = pDecoder[channel];
pDecoder[channel]->HandleVideoData(pUser, &pBS);
}
for (int i = 0; i < 8; i++)
{
int channel = md->mdi[i].channel;
unsigned char *decodedSurface;
if (!(pFrameQueue[channel]->isEndOfDecode() || pFrameQueue[channel]->isEmpty())) {
decodedSurface = (unsigned char *)malloc(1536 * 2048 * 3 / 2 * sizeof(unsigned char));
CUVIDPARSERDISPINFO pInfo;
if (pFrameQueue[channel]->dequeue(&pInfo)) {
CUdeviceptr dMappedFrame = 0;
unsigned int pitch;
CUVIDPROCPARAMS oVPP = { 0 };
oVPP.progressive_frame = pInfo.progressive_frame;
oVPP.second_field = 0;
oVPP.top_field_first = pInfo.top_field_first;
oVPP.unpaired_field = (pInfo.progressive_frame == 1 || pInfo.repeat_first_field <= 1);
cuvidMapVideoFrame(pDecoder[channel]->GetDecoder(), pInfo.picture_index, &dMappedFrame, &pitch, &oVPP);
printf("loop: %d channel: %d stamp:%d pic_index:%d\n", i, channel, pInfo.timestamp, pInfo.picture_index);
unsigned int nv12_size = pitch * (pDecoder[channel]->GetCUVIDEOFORMAT().coded_height + pDecoder[channel]->GetCUVIDEOFORMAT().coded_height / 2); // 12bpp
CCtxAutoLock lck(pDecoder[channel]->m_ctxLock);
CUresult oResult = cuMemcpyDtoH(decodedSurface, dMappedFrame, nv12_size);
assert(oResult == CUDA_SUCCESS);
//释放GPU中的内存
cuvidUnmapVideoFrame(pDecoder[channel]->GetDecoder(), dMappedFrame);
pFrameQueue[channel]->releaseFrame(&pInfo);
}
}
}
}