Issue running Driveworks DNN on custom model

Hi,

I am trying to run and validate our model using driveworks DNN, I have validated the same model using Tensort RT and its working fine.

With Driveworks I am facing few issues which I need your support, our model has 6 input and 11 output, for now I am trying to feed file based input for validating the output.
I have doubt in passing 6 input to my model, when ever I try I am getting issue like channel descriptor not matching during dwDataConditioner_prepareData.

Driveworks exception thrown: CudaTextureObjectCache::getOrCreateTextureObject. cudaErrorInvalidChannelDescriptor: invalid channel descriptor

or

Driveworks exception thrown: DW_INVALID_ARGUMENT: DataConditioner: Given tensor’s properties do not match the expected properties.

Initialize DNN:

    {
        // If not specified, load the correct network based on platform
        std::string tensorRTModel = getArgument("tensorRT_model");
        if (tensorRTModel.empty())
        {
            tensorRTModel = dw_samples::DataPath::get() + "/samples/detector/";
            tensorRTModel += getPlatformPrefix();
            tensorRTModel += "/model32.bin";
        }

        // Initialize DNN from a TensorRT file
        CHECK_DW_ERROR(dwDNN_initializeTensorRTFromFile(&m_dnn, tensorRTModel.c_str(), nullptr,
                                                        DW_PROCESSOR_TYPE_GPU, m_sdk));

        CHECK_DW_ERROR(dwDNN_setCUDAStream(m_cudaStream, m_dnn));

        // Get input and output dimensions
        dwDNNTensorProperties inputProps[NUM_INPUT_TENSORS];
        dwDNNTensorProperties outputProps[NUM_OUTPUT_TENSORS];

        // Allocate input tensor
        for (uint32_t inputIdx = 0U; inputIdx < NUM_INPUT_TENSORS; ++inputIdx)
        {
            CHECK_DW_ERROR(dwDNN_getInputTensorProperties(&inputProps[inputIdx], inputIdx , m_dnn));
            printf("input tensor done\n");
            CHECK_DW_ERROR(dwDNNTensor_createNew(&m_dnnInput[inputIdx], &inputProps[inputIdx], m_sdk));
            printf("input tensor create done\n");
        }

        // Allocate outputs
        for (uint32_t outputIdx = 0U; outputIdx < NUM_OUTPUT_TENSORS; ++outputIdx)
        {
            CHECK_DW_ERROR(dwDNN_getOutputTensorProperties(&outputProps[outputIdx], outputIdx, m_dnn));
            // Allocate device tensors
            CHECK_DW_ERROR(dwDNNTensor_createNew(&m_dnnOutputsDevice[outputIdx], &outputProps[outputIdx], m_sdk));
            // Allocate host tensors
            dwDNNTensorProperties hostProps = outputProps[outputIdx];
            hostProps.tensorType            = DW_DNN_TENSOR_TYPE_CPU;

            // Allocate streamer
            CHECK_DW_ERROR(dwDNNTensorStreamer_initialize(&m_dnnOutputStreamers[outputIdx],
                                                          &outputProps[outputIdx],
                                                          hostProps.tensorType, m_sdk));
        }

        // Get coverage and bounding box blob indices
        //const char* coverageBlobName    = "coverage";
        //const char* boundingBoxBlobName = "bboxes";
        //CHECK_DW_ERROR(dwDNN_getOutputIndex(&m_cvgIdx, coverageBlobName, m_dnn));
        //CHECK_DW_ERROR(dwDNN_getOutputIndex(&m_bboxIdx, boundingBoxBlobName, m_dnn));

        // Get metadata from DNN module
        // DNN loads metadata automatically from json file stored next to the dnn model,
        // with the same name but additional .json extension if present.
        // Otherwise, the metadata will be filled with default values and the dataconditioner parameters
        // should be filled manually.
        dwDNNMetaData metadata;
        CHECK_DW_ERROR(dwDNN_getMetaData(&metadata, m_dnn));

        for (uint32_t i = 0U; i < NUM_INPUT_TENSORS; ++i){
            // Initialie data conditioner
            CHECK_DW_ERROR(dwDataConditioner_initializeFromTensorProperties(&m_dataConditioner, &inputProps[i], 1U,
                                                                            &metadata.dataConditionerParams, m_cudaStream,
                                                                            m_sdk));

            // Detection region
            m_detectionRegion[i].width = static_cast<uint32_t>(inputProps[i].dimensionSize[0]);
            m_detectionRegion[i].height = static_cast<uint32_t>(inputProps[i].dimensionSize[1]);
            m_detectionRegion[i].x = 0;//(m_imageWidth - m_detectionRegion.width) / 2;
            m_detectionRegion[i].y = 0;//(m_imageHeight - m_detectionRegion.height) / 2;
        }

        // Compute a pixel (cell) in output in relation to a pixel in input of the network
        uint32_t gridW = outputProps[0].dimensionSize[0];
        m_cellSize     = inputProps[0].dimensionSize[0] / gridW;
    }

Process:

    dwImageCUDA* currframecuda = nullptr;
    dwImageCUDA* encoder0cuda = nullptr;
    dwImageCUDA* encoder1cuda = nullptr;
    dwImageCUDA* encoder2cuda = nullptr;
    dwImageCUDA* encoder3cuda = nullptr;
    dwImageCUDA* encoder4cuda = nullptr;

    dwImageHandle_t currentFrame[6];
    dwImageProperties currentFrameProps;
    currentFrameProps.type = DW_IMAGE_CUDA;
    currentFrameProps.memoryLayout = DW_IMAGE_MEMORY_TYPE_PITCH;
    currentFrameProps.format = DW_IMAGE_FORMAT_RGB_FLOAT32;
    currentFrameProps.height = 224;
    currentFrameProps.width = 512;

    dwImageProperties encoder0Props;
    encoder0Props.type = DW_IMAGE_CUDA;
    encoder0Props.memoryLayout = DW_IMAGE_MEMORY_TYPE_PITCH;
    encoder0Props.format = DW_IMAGE_FORMAT_R_FLOAT32;
    encoder0Props.height = 112;
    encoder0Props.width = 256;

    dwImageProperties encoder1Props;
    encoder1Props.memoryLayout = DW_IMAGE_MEMORY_TYPE_PITCH;
    encoder1Props.type = DW_IMAGE_CUDA;
    encoder1Props.format = DW_IMAGE_FORMAT_R_FLOAT32;
    encoder1Props.height = 56;
    encoder1Props.width = 128;

    dwImageProperties encoder2Props;
    encoder2Props.memoryLayout = DW_IMAGE_MEMORY_TYPE_PITCH;
    encoder2Props.type = DW_IMAGE_CUDA;
    encoder2Props.format = DW_IMAGE_FORMAT_R_FLOAT32;
    encoder2Props.height = 28;
    encoder2Props.width = 64;

    dwImageProperties encoder3Props;
    encoder3Props.memoryLayout = DW_IMAGE_MEMORY_TYPE_PITCH;
    encoder3Props.type = DW_IMAGE_CUDA;
    encoder3Props.format = DW_IMAGE_FORMAT_R_FLOAT32;
    encoder3Props.height = 14;
    encoder3Props.width = 32;

    dwImageProperties encoder4Props;
    encoder4Props.memoryLayout = DW_IMAGE_MEMORY_TYPE_PITCH;
    encoder4Props.type = DW_IMAGE_CUDA;
    encoder4Props.format = DW_IMAGE_FORMAT_R_FLOAT32;
    encoder4Props.height = 7;
    encoder4Props.width = 16;

    float* hostDataBuffer = (float*)malloc(512 * 224 * 3 * sizeof(float));
    loadFileToBuffer("/home/nvidia/input_files/current_frame.raw", (uint8_t *)hostDataBuffer, 512*224*3*4);

    float* hostDataBuffer1 = (float*)malloc(256*112*64 * sizeof(float));
    loadFileToBuffer("/home/nvidia/input_files/i_encoder_0.raw", (uint8_t *)hostDataBuffer1, 256*112*64*4);

    float* hostDataBuffer2 = (float*)malloc(128*56*64 * sizeof(float));
    loadFileToBuffer("/home/nvidia/input_files/i_encoder_1.raw", (uint8_t *)hostDataBuffer2, 128*56*64*4);

    float* hostDataBuffer3 = (float*)malloc(64*28*128 * sizeof(float));
    loadFileToBuffer("/home/nvidia/input_files/i_encoder_2.raw", (uint8_t *)hostDataBuffer3, 64*28*128*4);

    float* hostDataBuffer4 = (float*)malloc(32*14*256* sizeof(float));
    loadFileToBuffer("/home/nvidia/input_files/i_encoder_3.raw", (uint8_t *)hostDataBuffer4, 32*14*256*4);

    float* hostDataBuffer5 = (float*)malloc(16*7*512 * sizeof(float));
    loadFileToBuffer("/home/nvidia/input_files/i_encoder_4.raw", (uint8_t *)hostDataBuffer5, 16*7*512*4);

    CHECK_DW_ERROR(dwImage_create(&currentFrame[0],currentFrameProps,m_sdk));

    CHECK_DW_ERROR(dwImage_getCUDA(&currframecuda,currentFrame[0]));
    cudaMemcpy2D(currframecuda->dptr[0], currframecuda->pitch[0], hostDataBuffer,
                 sizeof(uint8_t) * 512 * 3,
                 sizeof(uint8_t) * 512 * 3,
                 224, cudaMemcpyHostToDevice);

    CHECK_DW_ERROR(dwImage_create(&currentFrame[1],encoder0Props,m_sdk));
    CHECK_DW_ERROR(dwImage_getCUDA(&encoder0cuda,currentFrame[1]));

    cudaMemcpy2D(encoder0cuda->dptr[0], encoder0cuda->pitch[0], hostDataBuffer1,
                 sizeof(uint8_t) * 256 * 64,
                 sizeof(uint8_t) * 256 * 64,
                 112, cudaMemcpyHostToDevice);

    CHECK_DW_ERROR(dwImage_create(&currentFrame[2],encoder1Props,m_sdk));
    CHECK_DW_ERROR(dwImage_getCUDA(&encoder1cuda,currentFrame[2]));

    cudaMemcpy2D(encoder1cuda->dptr[0], encoder1cuda->pitch[0], hostDataBuffer2,
                 sizeof(uint8_t) * 128 * 64,
                 sizeof(uint8_t) * 128 * 64,
                 56, cudaMemcpyHostToDevice);

    CHECK_DW_ERROR(dwImage_create(&currentFrame[3],encoder2Props,m_sdk));

    CHECK_DW_ERROR(dwImage_getCUDA(&encoder2cuda,currentFrame[3]));

    cudaMemcpy2D(encoder2cuda->dptr[0], encoder2cuda->pitch[0], hostDataBuffer3,
                 sizeof(uint8_t) * 64 * 128,
                 sizeof(uint8_t) * 64 * 128,
                 28, cudaMemcpyHostToDevice);

    CHECK_DW_ERROR(dwImage_create(&currentFrame[4],encoder3Props,m_sdk));
    CHECK_DW_ERROR(dwImage_getCUDA(&encoder3cuda,currentFrame[4]));
    cudaMemcpy2D(encoder3cuda->dptr[0], encoder3cuda->pitch[0], hostDataBuffer4,
                 sizeof(uint8_t) * 32 * 256,
                 sizeof(uint8_t) * 32 * 256,
                 14, cudaMemcpyHostToDevice);

    CHECK_DW_ERROR(dwImage_create(&currentFrame[5],encoder4Props,m_sdk));
    CHECK_DW_ERROR(dwImage_getCUDA(&encoder4cuda,currentFrame[5]));
    cudaMemcpy2D(encoder4cuda->dptr[0], encoder4cuda->pitch[0], hostDataBuffer5,
                 sizeof(uint8_t) * 16 * 512,
                 sizeof(uint8_t) * 16 * 512,
                 7, cudaMemcpyHostToDevice);

    auto inputImgs = static_cast<dwImageHandle_t  const *>(currentFrame);
    CHECK_DW_ERROR(dwDataConditioner_prepareData(m_dnnInput[0], inputImgs, 6U, &m_detectionRegion[0],
                                                 cudaAddressModeClamp, m_dataConditioner));
    dwConstDNNTensorHandle_t inputs[1U] = {m_dnnInput[0]};
    CHECK_DW_ERROR(dwDNN_infer(m_dnnOutputsDevice, NUM_OUTPUT_TENSORS, inputs, 6U, m_dnn));

Please let me what I am missing, do we need to have 6 input tensor and 6 data conditioner handle or single is enough,

Dear @mukilan.vijayakumar,
Firstly, could you confirm if the model32.bin is generated using TensorRT_Optmization tool?

Dear Siva,

Yes, it is generated using TensorRT_Optmization tool, Model loaded fine I think.

Please find attached log image, got one warning,


DNN: Missing or incompatible parameter in metadata (tonemapType). Parameter is set to default value. See dwDataConditioner for default values.

Dear @mukilan.vijayakumar,
Have you filled the right entries for your model in corresponding .json file? If so, could you share the .json file? If possible, please share complete code and model to repro this on my host for more clarity?

Dear Siva,

Please find the json file, code and the model input and output information.
model32.bin.json (321 Bytes)
main.cpp (32.5 KB)