(host) 3 threads decoding real-time rtsp, 1 thread decode a file

Hello,
my problem is that:
(host) 3 threads decoding real-time rtsp, 1 thread decode a video file. I used the example class “NvDecoder”.At the end of the video file ,I will destroy video decoder(cuvidDestroyDecoder) and create again(cuvidCreateVideoParser).
This operation will make Decodor using 0%(nvidia-smi -l 1 -q|grep Decoder).And decode result picture will be green picture.

My Os is ubuntu 16.04 Driver Version: 396.18 / CUDA Version:cuda9.2 . I have tried 1080ti and 1070ti with same version. both of them has same problem.
But when I change Driver Version to 430.34 and CUDA Version to 10.1. And Download Newest NVcodecSDK, this problem is fixed.
I want to know that: is this a bug of NVcodecSDK with version 396.18 ?

extern “C” {
#include <stdio.h>

#include <libavformat/avformat.h>
#include <libavutil/pixdesc.h>
#include <libavutil/opt.h>
#include <libavutil/avassert.h>
#include <libavutil/imgutils.h>

#include “libavutil/log.h”
}

#include “common/Image.h”
//#include “common/Logger.h”
#include

#include <cuda.h>
#include “nvidia/NvDecoder.h”
#include “nvidia/Utils/NvCodecUtils.h”
#include <X11/Xlib.h>

simplelogger::Logger *logger = simplelogger::LoggerFactory::CreateConsoleLogger(TRACE);

extern int cvtColor(unsigned char *d_req, unsigned char *d_res, int resolution,
int height, int width,
int linesize);

namespace test {

class RwLock {
public:
enum STATUS {READ_LOCK,WRITE_LOCK};
explicit RwLock(pthread_rwlock_t* rl,const STATUS st):spRwLock(rl,std::bind(&RwLock::unlock, this)) {
if(nullptr!=spRwLock) {
if(READ_LOCK==st) {
pthread_rwlock_rdlock(spRwLock.get());
} else {
pthread_rwlock_wrlock(spRwLock.get());
}
}

}

private:
std::shared_ptr<pthread_rwlock_t> spRwLock = nullptr;
private:
RwLock(const RwLock&) = delete;
RwLock& operator=(const RwLock&) = delete;
void unlock() {
if(nullptr!=spRwLock) {
pthread_rwlock_unlock(spRwLock.get());
}
}
};

class GpuDevice {
public:
GpuDevice() {
}
~GpuDevice() {}

int init(int gpuNo) {
    cuDev = std::make_shared<CUdevice>();
    cuCtx = std::make_shared<CUcontext>();

    ck(cuInit(0));
    int nGpu = 0;
    ck(cuDeviceGetCount(&nGpu));
    if (gpuNo < 0 || gpuNo >= nGpu) {
        std::cout << "GPU ordinal out of range. Should be within [" << 0 << ",  " << nGpu - 1 << "]" << std::endl;
    }

    ck(cuDeviceGet(cuDev.get(), gpuNo));
    char szDeviceName[80];
    ck(cuDeviceGetName(szDeviceName, sizeof(szDeviceName), *cuDev.get()));
    std::cout << "GPU in use: " << szDeviceName << std::endl;

    ck(cuCtxCreate(cuCtx.get(), 0, *cuDev.get()));

    mtx = std::make_shared<std::mutex>();
    return 0;
}

CUdevice* getDevice() {
    return cuDev.get();
}
CUcontext* getContext() {
    return cuCtx.get();
}
std::mutex* getMtx() {
    return mtx.get();
}

int destroy() {
    ck(cuCtxDestroy(*cuCtx.get()));
    return 0;
}

private:
std::shared_ptr cuDev = nullptr;
std::shared_ptr cuCtx = nullptr;
std::shared_ptrstd::mutex mtx = nullptr;

};

class NvidaDecode {
public:
NvidaDecode() {}
~NvidaDecode() {}
int init(CUcontext* context,const int w,const int h,AVCodecID id,std::mutex* mtx,const int channel) {

    width = w;
    height = h;
    channelNo = channel;
    decoder = std::make_shared<NvDecoder>(*context, w, h, true,
                                          FFmpeg2NvCodecId(id), mtx,
                                          false, false, nullptr, nullptr);
    return 0;
}
int decode(uint8_t* inputData,const int inputSize) {
    uint8_t **ppFrame = nullptr;
    int pictureCount = 0;
    decoder->Decode(inputData,inputSize,&ppFrame,&pictureCount);

    for(int i=0; i<pictureCount; i++) {
        std::shared_ptr<ivs::Image> imageOrigin =
            std::make_shared<ivs::Image>(height,width, 3, GPU_DEVICE(0));
        cvtColor(ppFrame[i], static_cast<uint8_t *>(imageOrigin->mData),
                 height * width, height,
                 width, width);

        //if(2==channelNo)
        {

            std::shared_ptr<ivs::Image> imgCpu = std::make_shared<ivs::Image>(
                    imageOrigin->mHeight,
                    imageOrigin->mWidth,
                    imageOrigin->mChannels,
                    DEVICE_TYPE_CPU);

            std::string strerror;
            ivs::gpuToCpu(reinterpret_cast<CUdeviceptr>(imageOrigin->mData),
                          (uint8_t *)imgCpu->mData,
                          imageOrigin->mWidth, 3 * imageOrigin->mHeight,strerror);

            cv::Mat cpumat(imgCpu->mHeight,
                           imgCpu->mWidth,CV_8UC3,
                           (unsigned char *)imgCpu->mData);

            cv::namedWindow(std::to_string(channelNo), CV_WINDOW_NORMAL);
            cv::imshow(std::to_string(channelNo),cpumat);
            cv::waitKey(1);

// char szPath[256] = {0};
// static int pathcount0 = 0;
// pathcount0++;
// sprintf(szPath, “/home/xzl/image_to_delete/%d.jpg”, pathcount0);
// std::string path(szPath);
// cv::imwrite(path, cpumat);
}

    }


}
int uninit() {}

private:

int channelNo = 0;

int width = 0;
int height = 0;

std::shared_ptr<NvDecoder> decoder = nullptr;

private:
inline cudaVideoCodec FFmpeg2NvCodecId(AVCodecID id) {
switch (id) {
case AV_CODEC_ID_MPEG1VIDEO :
return cudaVideoCodec_MPEG1;
case AV_CODEC_ID_MPEG2VIDEO :
return cudaVideoCodec_MPEG2;
case AV_CODEC_ID_MPEG4 :
return cudaVideoCodec_MPEG4;
case AV_CODEC_ID_VC1 :
return cudaVideoCodec_VC1;
case AV_CODEC_ID_H264 :
return cudaVideoCodec_H264;
case AV_CODEC_ID_HEVC :
return cudaVideoCodec_HEVC;
case AV_CODEC_ID_VP8 :
return cudaVideoCodec_VP8;
case AV_CODEC_ID_VP9 :
return cudaVideoCodec_VP9;
case AV_CODEC_ID_MJPEG :
return cudaVideoCodec_JPEG;
default :
return cudaVideoCodec_NumCodecs;
}
}
};

class decode {
public:
decode() {}
~decode() {
// avcodec_free_context(&decoder_ctx);
// avformat_close_input(&input_ctx);
}
int destroy() {
//RwLock writeLock(rwLock,RwLock::WRITE_LOCK);
//avcodec_free_context(&decoder_ctx);
avformat_close_input(&input_ctx);
return 0;
}

int init(const char *url,pthread_rwlock_t* rw,test::GpuDevice* gd,const int channelNo) {
    gpuDevice = gd;
    rwLock = rw;
    AVDictionary *dict = nullptr;
    //默认为udp传输,会出现花屏现象,因此设置为tcp传输,解决花屏问题。
    av_dict_set(&dict, "rtsp_transport", "tcp", 0);

    input_ctx = avformat_alloc_context();
    if (nullptr == input_ctx) {
        fprintf(stderr, "input context alloced failed.");
        return -1;
    }
    input_ctx->flags |= AVFMT_FLAG_NONBLOCK;

    /* open the input file */
    if (avformat_open_input(&input_ctx, url, NULL, &dict) != 0) {
        fprintf(stderr, "Cannot open input file '%s'\n", url);
        return -1;
    }

    if (avformat_find_stream_info(input_ctx, NULL) < 0) {
        fprintf(stderr, "Cannot find input stream information.\n");
        return -1;
    }

    /* find the video stream information */
    int ret = av_find_best_stream(input_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &decoder, 0);
    if (ret < 0) {
        fprintf(stderr, "Cannot find a video stream in the input file\n");
        return -1;
    }
    video_stream = ret;

    video = input_ctx->streams[video_stream];

    mWidth = video->codecpar->width;
    mHeight = video->codecpar->height;

    nvidaDecoder.init(gpuDevice->getContext(),mWidth,mHeight,decoder->id,gpuDevice->getMtx(),channelNo);

    return ret;
}

int decodeFrame(FILE *output_file,bool save) {
    //RwLock readLock(rwLock,RwLock::READ_LOCK);
    AVPacket packet;
    int ret = 0;
    if ((ret = av_read_frame(input_ctx, &packet)) < 0) {
        if(ret == AVERROR_EOF) {
            fprintf(stderr, "av_read_frame last frame:%d\n",ret);
            nvidaDecoder.decode(nullptr,0);

// packet.data = NULL;
// packet.size = 0;
// ret = decode_write(decoder_ctx, &packet,imageOrigin);
// av_packet_unref(&packet);
return ret;
} else
fprintf(stderr, “Failed av_read_frame\n”);
return -1;
}

    if (video_stream == packet.stream_index) {
        //if(save) usleep(40000);

// if(save) {
// if ((ret = fwrite(packet.data, 1, packet.size, output_file)) < 0) {
// fprintf(stderr, “Failed to dump raw data.\n”);
// }
// }
nvidaDecoder.decode(packet.data,packet.size);
//ret = decode_write(decoder_ctx, &packet,imageOrigin);
}

    av_packet_unref(&packet);
    return 0;
}

private:
AVFormatContext *input_ctx = nullptr;
AVCodec *decoder = nullptr;
AVCodecContext decoder_ctx = nullptr;
AVStream video = nullptr;
pthread_rwlock_t
rwLock = nullptr;
test::GpuDevice
gpuDevice = nullptr;
NvidaDecode nvidaDecoder;
int video_stream = 0;

int mWidth = 0;
int mHeight = 0;

};
}

class Task {
public:
void run(void* pArg) {
Task* pTask = static_cast<Task*>(pArg);
if(pTask==nullptr) {
printf(“thread params is nullptr!”);
}
while(true) {

        test::decode decode;
        //decode.init("/home/xzl/3.avi",&device);
        decode.init(pTask->url.c_str(),pTask->rwLock,pTask->gpuDevice,pTask->channelNo);
        int ret = 0;

// char szPath[256] = {0};
// static int pathcount0 = 0;
// pathcount0++;
// sprintf(szPath, “/home/xzl/image_to_delete/%d.mp4”, pathcount0);
// FILE *output_file = fopen(szPath, “w+”);

        CUresult curet = cuCtxSetCurrent(*pTask->gpuDevice->getContext());
        //CUresult curet = cuCtxPushCurrent(*pTask->gpuDevice->getContext());
        if(curet!=CUDA_SUCCESS) {
            printf("cuCtxSetCurrent Failed! code:%d",curet);
        }

        /* actual decoding and dump the raw data */
        while (ret >= 0) {

            ret = decode.decodeFrame(nullptr,(0!=channelNo
                                              &&1!=channelNo
                                              &&2!=channelNo));
        }
        decode.destroy();
        //device.destroy();

// if (output_file)
// fclose(output_file);

// curet = cuCtxPopCurrent(pTask->gpuDevice->getContext());
// if(curet!=CUDA_SUCCESS) {
// IVS_ERROR(“cuCtxPopCurrent Failed! code:{0}”,curet);
// }

    }
}

void setChannelNo(const int channelNo) {
    this->channelNo = channelNo;
}
void setUrl(const std::string& url) {
    this->url = url;
}
void setRwLock(pthread_rwlock_t* rw) {
    rwLock = rw;
}

void setGpuDevice(test::GpuDevice* gD) {
    gpuDevice = gD;
}

private:
int channelNo = 0;
std::string url;
pthread_rwlock_t* rwLock = nullptr;
test::GpuDevice* gpuDevice = nullptr;
};

void make () {}

void makeTask(const int channelNumber,test::GpuDevice* gpudevice,pthread_rwlock_t* rwLock) {
std::shared_ptr task[channelNumber];
std::shared_ptrstd::thread spThread[channelNumber];
for(int i=0; i<channelNumber; i++) {
task[i] = std::make_shared();
char szPath[256] = {0};
sprintf(szPath, “rtsp://admin:lx123456@192.168.17.%d:554/Streaming/Channels/101?transportmode=unicast”, i+1);

    task[i]->setUrl(szPath);
    task[i]->setChannelNo(i);
    task[i]->setRwLock(rwLock);
    task[i]->setGpuDevice(gpudevice);

    spThread[i] = std::make_shared<std::thread>(std::bind(&Task::run, task[i].get(),task[i].get()));
}

Task taskFile;
taskFile.setUrl("../test/1.avi");
taskFile.setChannelNo(3);
taskFile.setRwLock(rwLock);
taskFile.setGpuDevice(gpudevice);

std::shared_ptr<std::thread> spThread1 = std::make_shared<std::thread>(std::bind(&Task::run, &taskFile,&taskFile));

spThread1->join();

for(int i=0; i<channelNumber; i++) {
    spThread[i]->join();
}

}

#include <errno.h>
int main(int argc, char *argv) {

XInitThreads();

av_log_set_level(AV_LOG_ERROR);

test::GpuDevice gpudevice;
gpudevice.init(0);

makeTask(3,&gpudevice,nullptr);


return 0;

}

sorry for my poor english , here is the code