Dear
We are decoding lossy WebP (VP8) using cuvidDecodePicture()
and related APIs on an L4 server.
The program initially decodes without any issues, but after thousands of uses, it starts producing output with strange colors. Since the issue is reproducible after prolonged use, we suspect a memory leak. However, even when the issue occurs, the server has plenty of available memory, and our code review did not reveal any obvious problems.
Once the issue occurs, it persists until the program is restarted. Restarting the program temporarily resolves the issue.
We have attached a sample code and the resulting image when the issue occurs. We would appreciate any advice on potential causes of this problem.
(Issue Occured)
(Example Code)
class NvBaseDec {
public:
void create();
void resetCuvidVideoParser();
void destroy();
void destroyCudaDecoder();
void destroyCudaStructs();
static int handleVideoSequence(void *userData, CUVIDEOFORMAT *videoFormat);
static int handleDecodePicture(void *userData, CUVIDPICPARAMS *picParams);
static int handleDisplayPicture(void *userData, CUVIDPARSERDISPINFO *dispInfo);
static CUresult createCuDecoder(NvBaseDec *nvBaseDec, const CUVIDEOFORMAT *videoFormat, const cudaVideoSurfaceFormat &outputFormat, const uint32_t &decWidth, const uint32_t &decHeight);
static CUresult reconfigCuDecoder(NvBaseDec *nvBaseDec, CUVIDEOFORMAT *videoFormat, const uint32_t &decWidth, const uint32_t &decHeight);
static cudaVideoSurfaceFormat matchNvDecOutputFormat(cudaVideoChromaFormat chromaFormat, unsigned char lumabitDepth);
static CUresult checkOuputFormatFromCaps(NvBaseDec *nvBaseDec, const CUVIDEOFORMAT *videoFormat, const cudaVideoSurfaceFormat &outputFormat);
void setDefaultParserParameters(CUVIDPARSERPARAMS &videoParserParameters);
private:
CUcontext cuContext = nullptr;
CUdevice cuDevice;
CUvideodecoder cuDecoder = nullptr;
CUvideoparser cuParser = nullptr;
CUvideoctxlock ctxLock = nullptr;
CUVIDDECODECREATEINFO decoderInfo = {0};
uint32_t numOfBufferSurface = 1;
uint32_t imageWidth = 0;
uint32_t imageHeight = 0;
};
void NvBaseDec::create() {
CUresult cuRet = cuDeviceGet(&this->cuDevice, 0);
if (cuRet != CUDA_SUCCESS) {
CHECK_CURESULT_STRING(cuRet, "Failed to cuDeviceGet");
}
cuRet = cuDevicePrimaryCtxRetain(&this->cuContext, this->cuDevice);
if (cuRet != CUDA_SUCCESS) {
CHECK_CURESULT_STRING(cuRet, "Failed to cuDevicePrimaryCtxRetain");
}
cuRet = cuvidCtxLockCreate(&this->ctxLock, this->cuContext);
if (cuRet != CUDA_SUCCESS) {
this->destroyCudaStructs();
CHECK_CURESULT_STRING(cuRet, "Failed to cuvidCtxLockCreate");
}
CUVIDPARSERPARAMS videoParserParameters;
this->setDefaultParserParameters(videoParserParameters);
cuRet = cuvidCreateVideoParser(&this->cuParser, &videoParserParameters);
if (cuRet != CUDA_SUCCESS) {
this->destroyCudaStructs();
CHECK_CURESULT_STRING(cuRet, "Failed to cuvidCreateVideoParser");
}
}
void NvBaseDec::resetCuvidVideoParser() {
this->destroyCudaDecoder();
const char *errName = nullptr;
auto cuRet = cuvidDestroyVideoParser(this->cuParser);
if (this->cuParser && cuRet != CUDA_SUCCESS) {
cuGetErrorName(cuRet, &errName);
std::cerr << "warning : cuvidDestroyVideoParser fail : " << errName << std::endl;
}
this->cuParser = nullptr;
CUVIDPARSERPARAMS videoParserParameters;
this->setDefaultParserParameters(videoParserParameters);
cuRet = cuvidCreateVideoParser(&this->cuParser, &videoParserParameters);
if (cuRet != CUDA_SUCCESS) {
this->destroyCudaStructs();
CHECK_CURESULT_STRING(cuRet, "Failed to cuvidCreateVideoParser");
}
}
void NvBaseDec::destroy() {
this->destroyCudaStructs();
}
void NvBaseDec::destroyCudaDecoder() {
CUresult cuRet = CUDA_SUCCESS;
cuCtxPushCurrent(this->cuContext);
cuCtxPopCurrent(nullptr);
if (this->cuDecoder && (cuRet = cuvidDestroyDecoder(this->cuDecoder)) != CUDA_SUCCESS) {
const char *errName = nullptr;
cuGetErrorName(cuRet, &errName);
std::cerr << "warning : cuvidDestroyVideoParser fail : " << errName << std::endl;
}
this->cuDecoder = nullptr;
}
void NvBaseDec::destroyCudaStructs() {
const char *errName = nullptr;
CUresult cuRet = CUDA_SUCCESS;
this->destroyCudaDecoder();
if (this->cuParser && (cuRet = cuvidDestroyVideoParser(this->cuParser)) != CUDA_SUCCESS) {
cuGetErrorName(cuRet, &errName);
std::cerr << "warning : cuvidDestroyVideoParser fail : " << errName << std::endl;
}
this->cuParser = nullptr;
if (this->ctxLock && (cuRet = cuvidCtxLockDestroy(this->ctxLock)) != CUDA_SUCCESS) {
cuGetErrorName(cuRet, &errName);
std::cerr << "NvBaseDec::cuvidCtxLockDestroy fail : " << errName << std::endl;
}
this->ctxLock = nullptr;
if (this->cuContext) {
cuRet = cuDevicePrimaryCtxRelease(0);
if (CUDA_SUCCESS != cuRet) {
cuGetErrorName(cuRet, &errName);
std::cerr << "warning : cuDevicePrimaryCtxRelease fail : " << errName << std::endl;
}
this->cuContext = nullptr;
}
}
int NvBaseDec::handleVideoSequence(void *userData, CUVIDEOFORMAT *videoFormat) {
if (userData == nullptr) {
std::cerr << "handleVideoSequence userData is nullptr" << std::endl;
return 0;
}
NvBaseDec *nvBaseDec = reinterpret_cast<NvBaseDec *>(userData);
uint64_t area = static_cast<uint64_t>(videoFormat->coded_width) * static_cast<uint64_t>(videoFormat->coded_height);
uint64_t maxArea = static_cast<uint64_t>(NV_MAX_WIDTH) * static_cast<uint64_t>(NV_MAX_HEIGHT);
if (area > maxArea) {
CHECK_CALLBACK_ERROR(1, "handleVideoSequence fail, frame resolution[%d x %d] > nv max resolution[%d x %d]",
videoFormat->coded_width, videoFormat->coded_height, NV_MAX_WIDTH, NV_MAX_HEIGHT);
return 0;
}
CUresult cuRet = CUDA_SUCCESS;
const char *errName = nullptr;
cuGetErrorName(cuRet, &errName);
if (!nvBaseDec->cuDecoder ||
nvBaseDec->decoderInfo.ChromaFormat != videoFormat->chroma_format ||
nvBaseDec->decoderInfo.bitDepthMinus8 != videoFormat->bit_depth_luma_minus8 ||
nvBaseDec->decoderInfo.ulWidth < videoFormat->coded_width ||
nvBaseDec->decoderInfo.ulHeight < videoFormat->coded_height) {
if (nvBaseDec->cuDecoder && (cuRet = cuvidDestroyDecoder(nvBaseDec->cuDecoder)) != CUDA_SUCCESS) {
cuGetErrorName(cuRet, &errName);
CHECK_CALLBACK_ERROR(1, "Failed to cuvidDestroyDecoder : %s", errName);
return 0;
}
nvBaseDec->cuDecoder = nullptr;
auto matchedOutputFormat = matchNvDecOutputFormat(videoFormat->chroma_format, videoFormat->bit_depth_luma_minus8);
cuRet = checkOuputFormatFromCaps(nvBaseDec, videoFormat, matchedOutputFormat);
if (cuRet != CUDA_SUCCESS) {
cuGetErrorName(cuRet, &errName);
CHECK_CALLBACK_ERROR(1, "Failed at checkOuputFormatFromCaps by: %s", errName);
return 0;
}
cuRet = createCuDecoder(nvBaseDec, videoFormat, matchedOutputFormat, videoFormat->coded_width, videoFormat->coded_height);
if (cuRet != CUDA_SUCCESS) {
cuGetErrorName(cuRet, &errName);
CHECK_CALLBACK_ERROR(1, "Failed to createCuDecoder : %s", errName);
return 0;
}
} else if (nvBaseDec->decoderInfo.ulNumDecodeSurfaces < videoFormat->min_num_decode_surfaces) {
cuRet = reconfigCuDecoder(nvBaseDec, videoFormat, videoFormat->coded_width, videoFormat->coded_height);
if (cuRet != CUDA_SUCCESS) {
cuGetErrorName(cuRet, &errName);
CHECK_CALLBACK_ERROR(1, "Failed to reconfigCuDecoder : %s", errName);
return 0;
}
}
return videoFormat->min_num_decode_surfaces;
}
int NvBaseDec::handleDecodePicture(void *userData, CUVIDPICPARAMS *picParams) {
NvBaseDec *nvBaseDec = reinterpret_cast<NvBaseDec *>(userData);
cuCtxPushCurrent(nvBaseDec->cuContext);
auto cuRet = cuvidDecodePicture(nvBaseDec->cuDecoder, picParams);
if (cuRet != CUDA_SUCCESS) {
cuCtxPopCurrent(nullptr);
const char* errStr;
cuGetErrorString(cuRet, &errStr);
CHECK_CALLBACK_ERROR(1, "cuvidDecodePicture failed :: %d, %s", cuRet, errStr);
return 0;
}
cuCtxPopCurrent(nullptr);
return 1;
}
int NvBaseDec::handleDisplayPicture(void *userData, CUVIDPARSERDISPINFO *dispInfo) {
NvBaseDec *nvBaseDec = reinterpret_cast<NvBaseDec *>(userData);
CUVIDPROCPARAMS videoProcessingParameters = {};
videoProcessingParameters.progressive_frame = dispInfo->progressive_frame;
videoProcessingParameters.second_field = dispInfo->repeat_first_field + 1;
videoProcessingParameters.top_field_first = dispInfo->top_field_first;
videoProcessingParameters.unpaired_field = dispInfo->repeat_first_field < 0;
cuCtxPushCurrent(nvBaseDec->cuContext);
CUdeviceptr frameAddr;
unsigned int framePitch;
auto cuRet = cuvidMapVideoFrame(nvBaseDec->cuDecoder, dispInfo->picture_index, &frameAddr, &framePitch, &videoProcessingParameters);
if (cuRet != CUDA_SUCCESS) {
cuCtxPopCurrent(nullptr);
auto cudaErrStr = getCudaErrorString(cuRet);
std::cerr << "cuvidMapVideoFrame fail: " << cudaErrStr << std::endl;
CHECK_CALLBACK_ERROR(1, "cuvidMapVideoFrame has been failed for %d, %s", cuRet, cudaErrStr.c_str());
return 0;
}
cuCtxPopCurrent(nullptr);
return 1;
}
CUresult NvBaseDec::checkOuputFormatFromCaps(NvBaseDec *nvBaseDec, const CUVIDEOFORMAT *videoFormat, const cudaVideoSurfaceFormat &outputFormat) {
CUVIDDECODECAPS decodecaps;
memset(&decodecaps, 0, sizeof(decodecaps));
decodecaps.eCodecType = videoFormat->codec;
decodecaps.eChromaFormat = videoFormat->chroma_format;
decodecaps.nBitDepthMinus8 = videoFormat->bit_depth_luma_minus8;
cuCtxPushCurrent(nvBaseDec->cuContext);
auto cuRet = cuvidGetDecoderCaps(&decodecaps);
if (CUDA_SUCCESS != cuRet) {
cuCtxPopCurrent(NULL);
const char *errName = nullptr;
cuGetErrorName(cuRet, &errName);
CHECK_CALLBACK_ERROR(1, "cuvidGetDecoderCaps cuResult = %s", errName);
return cuRet;
}
cuCtxPopCurrent(NULL);
if (!(decodecaps.nOutputFormatMask)) {
CHECK_CALLBACK_ERROR(1, "The device does not support the corresponding VideoFormat : codec %d, chroma %d, bitDepth %d",
videoFormat->codec, videoFormat->chroma_format, videoFormat->bit_depth_luma_minus8);
return CUDA_ERROR_INVALID_IMAGE;
} else if (!(decodecaps.nOutputFormatMask & (1 << outputFormat))) {
CHECK_CALLBACK_ERROR(1, "The device does not support that outputFormat. recommend using a different format : list %d, codec %d, chroma %d, bitDepth %d ",
decodecaps.nOutputFormatMask, videoFormat->codec, videoFormat->chroma_format, videoFormat->bit_depth_luma_minus8);
return CUDA_ERROR_NO_DEVICE;
}
return CUDA_SUCCESS;
}
CUresult NvBaseDec::createCuDecoder(NvBaseDec *nvBaseDec, const CUVIDEOFORMAT *videoFormat, const cudaVideoSurfaceFormat &outputFormat, const uint32_t &decWidth, const uint32_t &decHeight) {
CUVIDDECODECREATEINFO videoDecodeCreateInfo = {0};
videoDecodeCreateInfo.CodecType = videoFormat->codec;
videoDecodeCreateInfo.ChromaFormat = videoFormat->chroma_format;
videoDecodeCreateInfo.OutputFormat = outputFormat;
videoDecodeCreateInfo.bitDepthMinus8 = videoFormat->bit_depth_luma_minus8;
videoDecodeCreateInfo.DeinterlaceMode = videoFormat->progressive_sequence ? cudaVideoDeinterlaceMode_Weave : cudaVideoDeinterlaceMode_Adaptive;
videoDecodeCreateInfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
videoDecodeCreateInfo.ulNumDecodeSurfaces = videoFormat->min_num_decode_surfaces;
videoDecodeCreateInfo.ulNumOutputSurfaces = nvBaseDec->numOfBufferSurface;
videoDecodeCreateInfo.vidLock = nvBaseDec->ctxLock;
videoDecodeCreateInfo.ulWidth = decWidth;
videoDecodeCreateInfo.ulHeight = decHeight;
videoDecodeCreateInfo.ulTargetWidth = decWidth;
videoDecodeCreateInfo.ulTargetHeight = decHeight;
cuCtxPushCurrent(nvBaseDec->cuContext);
auto cuRet = cuvidCreateDecoder(&nvBaseDec->cuDecoder, &videoDecodeCreateInfo);
if (cuRet != CUDA_SUCCESS) {
cuCtxPopCurrent(nullptr);
const char *errName = nullptr;
cuGetErrorName(cuRet, &errName);
CHECK_CALLBACK_ERROR(1, "Failed cuvidCreateDecoder %s for [%d x %d]", errName, decWidth, decHeight);
}
nvBaseDec->decoderInfo = videoDecodeCreateInfo;
cuCtxPopCurrent(nullptr);
return cuRet;
}
CUresult NvBaseDec::reconfigCuDecoder(NvBaseDec *nvBaseDec, CUVIDEOFORMAT *videoFormat, const uint32_t &decWidth, const uint32_t &decHeight) {
CUVIDRECONFIGUREDECODERINFO videoDecodeReconfigInfo = {0};
videoDecodeReconfigInfo.ulWidth = decWidth;
videoDecodeReconfigInfo.ulHeight = decHeight;
videoDecodeReconfigInfo.ulTargetWidth = decWidth;
videoDecodeReconfigInfo.ulTargetHeight = decHeight;
videoDecodeReconfigInfo.ulNumDecodeSurfaces = videoFormat->min_num_decode_surfaces;
cuCtxPushCurrent(nvBaseDec->cuContext);
auto cuRet = cuvidReconfigureDecoder(nvBaseDec->cuDecoder, &videoDecodeReconfigInfo);
cuCtxPopCurrent(nullptr);
if (cuRet == CUDA_SUCCESS) {
nvBaseDec->decoderInfo.ulWidth = decWidth;
nvBaseDec->decoderInfo.ulHeight = decHeight;
nvBaseDec->decoderInfo.ulTargetWidth = decWidth;
nvBaseDec->decoderInfo.ulTargetHeight = decHeight;
nvBaseDec->decoderInfo.ulNumDecodeSurfaces = videoFormat->min_num_decode_surfaces;
}
return cuRet;
}
cudaVideoSurfaceFormat NvBaseDec::matchNvDecOutputFormat(cudaVideoChromaFormat chromaFormat, unsigned char lumabitDepth) {
if (chromaFormat == cudaVideoChromaFormat_420 || chromaFormat == cudaVideoChromaFormat_Monochrome) {
return lumabitDepth ? cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12;
} else if (chromaFormat == cudaVideoChromaFormat_444) {
return lumabitDepth ? cudaVideoSurfaceFormat_YUV444_16Bit : cudaVideoSurfaceFormat_YUV444;
} else if (chromaFormat == cudaVideoChromaFormat_422) {
return cudaVideoSurfaceFormat_NV12;
}
return cudaVideoSurfaceFormat_NV12;
}
void NvBaseDec::setDefaultParserParameters(CUVIDPARSERPARAMS &videoParserParameters) {
memset(&videoParserParameters, 0, sizeof(videoParserParameters));
videoParserParameters.CodecType = cudaVideoCodec_VP8; // Example codec type
videoParserParameters.ulMaxNumDecodeSurfaces = 1;
videoParserParameters.ulMaxDisplayDelay = 0;
videoParserParameters.pUserData = reinterpret_cast<void *>(this);
videoParserParameters.pfnSequenceCallback = this->handleVideoSequence;
videoParserParameters.pfnDecodePicture = this->handleDecodePicture;
videoParserParameters.pfnDisplayPicture = this->handleDisplayPicture;
}
Thank you
BR
Shin