cuvidDecodePicture() Decodes Lossy WebP with Incorrect Colors

Dear

We are decoding lossy WebP (VP8) using cuvidDecodePicture() and related APIs on an L4 server.

The program initially decodes without any issues, but after thousands of uses, it starts producing output with strange colors. Since the issue is reproducible after prolonged use, we suspect a memory leak. However, even when the issue occurs, the server has plenty of available memory, and our code review did not reveal any obvious problems.

Once the issue occurs, it persists until the program is restarted. Restarting the program temporarily resolves the issue.

We have attached a sample code and the resulting image when the issue occurs. We would appreciate any advice on potential causes of this problem.

(Issue Occured)

(Example Code)

class NvBaseDec {
public:
    void create();
    void resetCuvidVideoParser();
    void destroy();
    void destroyCudaDecoder();
    void destroyCudaStructs();
    static int handleVideoSequence(void *userData, CUVIDEOFORMAT *videoFormat);
    static int handleDecodePicture(void *userData, CUVIDPICPARAMS *picParams);
    static int handleDisplayPicture(void *userData, CUVIDPARSERDISPINFO *dispInfo);
    static CUresult createCuDecoder(NvBaseDec *nvBaseDec, const CUVIDEOFORMAT *videoFormat, const cudaVideoSurfaceFormat &outputFormat, const uint32_t &decWidth, const uint32_t &decHeight);
    static CUresult reconfigCuDecoder(NvBaseDec *nvBaseDec, CUVIDEOFORMAT *videoFormat, const uint32_t &decWidth, const uint32_t &decHeight);
    static cudaVideoSurfaceFormat matchNvDecOutputFormat(cudaVideoChromaFormat chromaFormat, unsigned char lumabitDepth);
    static CUresult checkOuputFormatFromCaps(NvBaseDec *nvBaseDec, const CUVIDEOFORMAT *videoFormat, const cudaVideoSurfaceFormat &outputFormat);
    void setDefaultParserParameters(CUVIDPARSERPARAMS &videoParserParameters);

private:
    CUcontext cuContext = nullptr;
    CUdevice cuDevice;
    CUvideodecoder cuDecoder = nullptr;
    CUvideoparser cuParser = nullptr;
    CUvideoctxlock ctxLock = nullptr;
    CUVIDDECODECREATEINFO decoderInfo = {0};
    uint32_t numOfBufferSurface = 1;
    uint32_t imageWidth = 0;
    uint32_t imageHeight = 0;
};

void NvBaseDec::create() {
    CUresult cuRet = cuDeviceGet(&this->cuDevice, 0);
    if (cuRet != CUDA_SUCCESS) {
        CHECK_CURESULT_STRING(cuRet, "Failed to cuDeviceGet");
    }

    cuRet = cuDevicePrimaryCtxRetain(&this->cuContext, this->cuDevice);
    if (cuRet != CUDA_SUCCESS) {
        CHECK_CURESULT_STRING(cuRet, "Failed to cuDevicePrimaryCtxRetain");
    }

    cuRet = cuvidCtxLockCreate(&this->ctxLock, this->cuContext);
    if (cuRet != CUDA_SUCCESS) {
        this->destroyCudaStructs();
        CHECK_CURESULT_STRING(cuRet, "Failed to cuvidCtxLockCreate");
    }

    CUVIDPARSERPARAMS videoParserParameters;
    this->setDefaultParserParameters(videoParserParameters);

    cuRet = cuvidCreateVideoParser(&this->cuParser, &videoParserParameters);
    if (cuRet != CUDA_SUCCESS) {
        this->destroyCudaStructs();
        CHECK_CURESULT_STRING(cuRet, "Failed to cuvidCreateVideoParser");
    }
}

void NvBaseDec::resetCuvidVideoParser() {
    this->destroyCudaDecoder();

    const char *errName = nullptr;
    auto cuRet = cuvidDestroyVideoParser(this->cuParser);
    if (this->cuParser && cuRet != CUDA_SUCCESS) {
        cuGetErrorName(cuRet, &errName);
        std::cerr << "warning : cuvidDestroyVideoParser fail : " << errName << std::endl;
    }

    this->cuParser = nullptr;

    CUVIDPARSERPARAMS videoParserParameters;
    this->setDefaultParserParameters(videoParserParameters);

    cuRet = cuvidCreateVideoParser(&this->cuParser, &videoParserParameters);
    if (cuRet != CUDA_SUCCESS) {
        this->destroyCudaStructs();
        CHECK_CURESULT_STRING(cuRet, "Failed to cuvidCreateVideoParser");
    }
}

void NvBaseDec::destroy() {
    this->destroyCudaStructs();
}

void NvBaseDec::destroyCudaDecoder() {
    CUresult cuRet = CUDA_SUCCESS;

    cuCtxPushCurrent(this->cuContext);
    cuCtxPopCurrent(nullptr);

    if (this->cuDecoder && (cuRet = cuvidDestroyDecoder(this->cuDecoder)) != CUDA_SUCCESS) {
        const char *errName = nullptr;
        cuGetErrorName(cuRet, &errName);
        std::cerr << "warning : cuvidDestroyVideoParser fail : " << errName << std::endl;
    }

    this->cuDecoder = nullptr;
}

void NvBaseDec::destroyCudaStructs() {
    const char *errName = nullptr;
    CUresult cuRet = CUDA_SUCCESS;

    this->destroyCudaDecoder();

    if (this->cuParser && (cuRet = cuvidDestroyVideoParser(this->cuParser)) != CUDA_SUCCESS) {
        cuGetErrorName(cuRet, &errName);
        std::cerr << "warning : cuvidDestroyVideoParser fail : " << errName << std::endl;
    }
    this->cuParser = nullptr;

    if (this->ctxLock && (cuRet = cuvidCtxLockDestroy(this->ctxLock)) != CUDA_SUCCESS) {
        cuGetErrorName(cuRet, &errName);
        std::cerr << "NvBaseDec::cuvidCtxLockDestroy fail : " << errName << std::endl;
    }
    this->ctxLock = nullptr;

    if (this->cuContext) {
        cuRet = cuDevicePrimaryCtxRelease(0);
        if (CUDA_SUCCESS != cuRet) {
            cuGetErrorName(cuRet, &errName);
            std::cerr << "warning : cuDevicePrimaryCtxRelease fail : " << errName << std::endl;
        }
        this->cuContext = nullptr;
    }
}

int NvBaseDec::handleVideoSequence(void *userData, CUVIDEOFORMAT *videoFormat) {
    if (userData == nullptr) {
        std::cerr << "handleVideoSequence userData is nullptr" << std::endl;
        return 0;
    }
    NvBaseDec *nvBaseDec = reinterpret_cast<NvBaseDec *>(userData);

    uint64_t area = static_cast<uint64_t>(videoFormat->coded_width) * static_cast<uint64_t>(videoFormat->coded_height);
    uint64_t maxArea = static_cast<uint64_t>(NV_MAX_WIDTH) * static_cast<uint64_t>(NV_MAX_HEIGHT);
    if (area > maxArea) {
        CHECK_CALLBACK_ERROR(1, "handleVideoSequence fail, frame resolution[%d x %d] > nv max resolution[%d x %d]",
                             videoFormat->coded_width, videoFormat->coded_height, NV_MAX_WIDTH, NV_MAX_HEIGHT);
        return 0;
    }

    CUresult cuRet = CUDA_SUCCESS;
    const char *errName = nullptr;
    cuGetErrorName(cuRet, &errName);
    if (!nvBaseDec->cuDecoder ||
        nvBaseDec->decoderInfo.ChromaFormat != videoFormat->chroma_format ||
        nvBaseDec->decoderInfo.bitDepthMinus8 != videoFormat->bit_depth_luma_minus8 ||
        nvBaseDec->decoderInfo.ulWidth < videoFormat->coded_width ||
        nvBaseDec->decoderInfo.ulHeight < videoFormat->coded_height) {
        if (nvBaseDec->cuDecoder && (cuRet = cuvidDestroyDecoder(nvBaseDec->cuDecoder)) != CUDA_SUCCESS) {
            cuGetErrorName(cuRet, &errName);
            CHECK_CALLBACK_ERROR(1, "Failed to cuvidDestroyDecoder : %s", errName);
            return 0;
        }
        nvBaseDec->cuDecoder = nullptr;

        auto matchedOutputFormat = matchNvDecOutputFormat(videoFormat->chroma_format, videoFormat->bit_depth_luma_minus8);
        cuRet = checkOuputFormatFromCaps(nvBaseDec, videoFormat, matchedOutputFormat);
        if (cuRet != CUDA_SUCCESS) {
            cuGetErrorName(cuRet, &errName);
            CHECK_CALLBACK_ERROR(1, "Failed at checkOuputFormatFromCaps by: %s", errName);
            return 0;
        }

        cuRet = createCuDecoder(nvBaseDec, videoFormat, matchedOutputFormat, videoFormat->coded_width, videoFormat->coded_height);
        if (cuRet != CUDA_SUCCESS) {
            cuGetErrorName(cuRet, &errName);
            CHECK_CALLBACK_ERROR(1, "Failed to createCuDecoder : %s", errName);
            return 0;
        }
    } else if (nvBaseDec->decoderInfo.ulNumDecodeSurfaces < videoFormat->min_num_decode_surfaces) {
        cuRet = reconfigCuDecoder(nvBaseDec, videoFormat, videoFormat->coded_width, videoFormat->coded_height);
        if (cuRet != CUDA_SUCCESS) {
            cuGetErrorName(cuRet, &errName);
            CHECK_CALLBACK_ERROR(1, "Failed to reconfigCuDecoder : %s", errName);
            return 0;
        }
    }

    return videoFormat->min_num_decode_surfaces;
}

int NvBaseDec::handleDecodePicture(void *userData, CUVIDPICPARAMS *picParams) {
    NvBaseDec *nvBaseDec = reinterpret_cast<NvBaseDec *>(userData);

    cuCtxPushCurrent(nvBaseDec->cuContext);
    auto cuRet = cuvidDecodePicture(nvBaseDec->cuDecoder, picParams);
    if (cuRet != CUDA_SUCCESS) {
        cuCtxPopCurrent(nullptr);
        const char* errStr;
        cuGetErrorString(cuRet, &errStr);
        CHECK_CALLBACK_ERROR(1, "cuvidDecodePicture failed :: %d, %s", cuRet, errStr);
        return 0;
    }

    cuCtxPopCurrent(nullptr);

    return 1;
}

int NvBaseDec::handleDisplayPicture(void *userData, CUVIDPARSERDISPINFO *dispInfo) {
    NvBaseDec *nvBaseDec = reinterpret_cast<NvBaseDec *>(userData);

    CUVIDPROCPARAMS videoProcessingParameters = {};
    videoProcessingParameters.progressive_frame = dispInfo->progressive_frame;
    videoProcessingParameters.second_field = dispInfo->repeat_first_field + 1;
    videoProcessingParameters.top_field_first = dispInfo->top_field_first;
    videoProcessingParameters.unpaired_field = dispInfo->repeat_first_field < 0;

    cuCtxPushCurrent(nvBaseDec->cuContext);
    CUdeviceptr frameAddr;
    unsigned int framePitch;
    auto cuRet = cuvidMapVideoFrame(nvBaseDec->cuDecoder, dispInfo->picture_index, &frameAddr, &framePitch, &videoProcessingParameters);
    if (cuRet != CUDA_SUCCESS) {
        cuCtxPopCurrent(nullptr);
        auto cudaErrStr = getCudaErrorString(cuRet);
        std::cerr << "cuvidMapVideoFrame fail: " << cudaErrStr << std::endl;
        CHECK_CALLBACK_ERROR(1, "cuvidMapVideoFrame has been failed for %d, %s", cuRet, cudaErrStr.c_str());
        return 0;
    }

    cuCtxPopCurrent(nullptr);

    return 1;
}

CUresult NvBaseDec::checkOuputFormatFromCaps(NvBaseDec *nvBaseDec, const CUVIDEOFORMAT *videoFormat, const cudaVideoSurfaceFormat &outputFormat) {
    CUVIDDECODECAPS decodecaps;
    memset(&decodecaps, 0, sizeof(decodecaps));

    decodecaps.eCodecType = videoFormat->codec;
    decodecaps.eChromaFormat = videoFormat->chroma_format;
    decodecaps.nBitDepthMinus8 = videoFormat->bit_depth_luma_minus8;

    cuCtxPushCurrent(nvBaseDec->cuContext);
    auto cuRet = cuvidGetDecoderCaps(&decodecaps);
    if (CUDA_SUCCESS != cuRet) {
        cuCtxPopCurrent(NULL);
        const char *errName = nullptr;
        cuGetErrorName(cuRet, &errName);
        CHECK_CALLBACK_ERROR(1, "cuvidGetDecoderCaps cuResult = %s", errName);
        return cuRet;
    }
    cuCtxPopCurrent(NULL);

    if (!(decodecaps.nOutputFormatMask)) {
        CHECK_CALLBACK_ERROR(1, "The device does not support the corresponding VideoFormat : codec %d, chroma %d, bitDepth %d",
                             videoFormat->codec, videoFormat->chroma_format, videoFormat->bit_depth_luma_minus8);
        return CUDA_ERROR_INVALID_IMAGE;
    } else if (!(decodecaps.nOutputFormatMask & (1 << outputFormat))) {
        CHECK_CALLBACK_ERROR(1, "The device does not support that outputFormat. recommend using a different format : list %d, codec %d, chroma %d, bitDepth %d ",
                             decodecaps.nOutputFormatMask, videoFormat->codec, videoFormat->chroma_format, videoFormat->bit_depth_luma_minus8);
        return CUDA_ERROR_NO_DEVICE;
    }

    return CUDA_SUCCESS;
}

CUresult NvBaseDec::createCuDecoder(NvBaseDec *nvBaseDec, const CUVIDEOFORMAT *videoFormat, const cudaVideoSurfaceFormat &outputFormat, const uint32_t &decWidth, const uint32_t &decHeight) {
    CUVIDDECODECREATEINFO videoDecodeCreateInfo = {0};
    videoDecodeCreateInfo.CodecType = videoFormat->codec;
    videoDecodeCreateInfo.ChromaFormat = videoFormat->chroma_format;
    videoDecodeCreateInfo.OutputFormat = outputFormat;
    videoDecodeCreateInfo.bitDepthMinus8 = videoFormat->bit_depth_luma_minus8;

    videoDecodeCreateInfo.DeinterlaceMode = videoFormat->progressive_sequence ? cudaVideoDeinterlaceMode_Weave : cudaVideoDeinterlaceMode_Adaptive;
    videoDecodeCreateInfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
    videoDecodeCreateInfo.ulNumDecodeSurfaces = videoFormat->min_num_decode_surfaces;

    videoDecodeCreateInfo.ulNumOutputSurfaces = nvBaseDec->numOfBufferSurface;
    videoDecodeCreateInfo.vidLock = nvBaseDec->ctxLock;
    videoDecodeCreateInfo.ulWidth = decWidth;
    videoDecodeCreateInfo.ulHeight = decHeight;
    videoDecodeCreateInfo.ulTargetWidth = decWidth;
    videoDecodeCreateInfo.ulTargetHeight = decHeight;

    cuCtxPushCurrent(nvBaseDec->cuContext);
    auto cuRet = cuvidCreateDecoder(&nvBaseDec->cuDecoder, &videoDecodeCreateInfo);
    if (cuRet != CUDA_SUCCESS) {
        cuCtxPopCurrent(nullptr);
        const char *errName = nullptr;
        cuGetErrorName(cuRet, &errName);
        CHECK_CALLBACK_ERROR(1, "Failed cuvidCreateDecoder %s for [%d x %d]", errName, decWidth, decHeight);
    }
    nvBaseDec->decoderInfo = videoDecodeCreateInfo;
    cuCtxPopCurrent(nullptr);

    return cuRet;
}

CUresult NvBaseDec::reconfigCuDecoder(NvBaseDec *nvBaseDec, CUVIDEOFORMAT *videoFormat, const uint32_t &decWidth, const uint32_t &decHeight) {
    CUVIDRECONFIGUREDECODERINFO videoDecodeReconfigInfo = {0};

    videoDecodeReconfigInfo.ulWidth = decWidth;
    videoDecodeReconfigInfo.ulHeight = decHeight;
    videoDecodeReconfigInfo.ulTargetWidth = decWidth;
    videoDecodeReconfigInfo.ulTargetHeight = decHeight;
    videoDecodeReconfigInfo.ulNumDecodeSurfaces = videoFormat->min_num_decode_surfaces;

    cuCtxPushCurrent(nvBaseDec->cuContext);
    auto cuRet = cuvidReconfigureDecoder(nvBaseDec->cuDecoder, &videoDecodeReconfigInfo);
    cuCtxPopCurrent(nullptr);

    if (cuRet == CUDA_SUCCESS) {
        nvBaseDec->decoderInfo.ulWidth = decWidth;
        nvBaseDec->decoderInfo.ulHeight = decHeight;
        nvBaseDec->decoderInfo.ulTargetWidth = decWidth;
        nvBaseDec->decoderInfo.ulTargetHeight = decHeight;
        nvBaseDec->decoderInfo.ulNumDecodeSurfaces = videoFormat->min_num_decode_surfaces;
    }
    return cuRet;
}

cudaVideoSurfaceFormat NvBaseDec::matchNvDecOutputFormat(cudaVideoChromaFormat chromaFormat, unsigned char lumabitDepth) {
    if (chromaFormat == cudaVideoChromaFormat_420 || chromaFormat == cudaVideoChromaFormat_Monochrome) {
        return lumabitDepth ? cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12;
    } else if (chromaFormat == cudaVideoChromaFormat_444) {
        return lumabitDepth ? cudaVideoSurfaceFormat_YUV444_16Bit : cudaVideoSurfaceFormat_YUV444;
    } else if (chromaFormat == cudaVideoChromaFormat_422) {
        return cudaVideoSurfaceFormat_NV12;
    }

    return cudaVideoSurfaceFormat_NV12;
}

void NvBaseDec::setDefaultParserParameters(CUVIDPARSERPARAMS &videoParserParameters) {
    memset(&videoParserParameters, 0, sizeof(videoParserParameters));
    videoParserParameters.CodecType = cudaVideoCodec_VP8; // Example codec type
    videoParserParameters.ulMaxNumDecodeSurfaces = 1;
    videoParserParameters.ulMaxDisplayDelay = 0;
    videoParserParameters.pUserData = reinterpret_cast<void *>(this);
    videoParserParameters.pfnSequenceCallback = this->handleVideoSequence;
    videoParserParameters.pfnDecodePicture = this->handleDecodePicture;
    videoParserParameters.pfnDisplayPicture = this->handleDisplayPicture;
}

Thank you

BR
Shin

vp8_issue_files.zip (889.1 KB)

I found a way to reproduce this issue using original WEBP files. By creating a WEBP file with an image quality of 100 through Photoshop, this issue can be reproduced.

Additionally, I investigated some WEBP images randomly obtained through Google search, and found files where the issue could be reproduced.

By using a command like:

shell

코드 복사

webpinfo -diag -summary -bitstream_info ../hskim_webp/cmyk_exif+xmp+pho+file_99.webp

to check with webpinfo, I found a commonality among the files where the issue was reproducible.

  • For files where the issue occurred, there was a value of 255 in the Prob segment field, as seen in:

yaml

코드 복사

Prob segment: 255 46 255
  • Alternatively, if the Use segment field was 0, making the Prob segment field completely absent, the issue was also reproducible.

I’ve attached the original file where the issue can be reproduced. It would be helpful if you could perform a reproduction test.

Using a sample code that employs the VP8 Cuvid Decoder should make it easy to reproduce the issue.