Hi,
I am trying to run and validate our model using driveworks DNN, I have validated the same model using Tensort RT and its working fine.
With Driveworks I am facing few issues which I need your support, our model has 6 input and 11 output, for now I am trying to feed file based input for validating the output.
I have doubt in passing 6 input to my model, when ever I try I am getting issue like channel descriptor not matching during dwDataConditioner_prepareData.
Driveworks exception thrown: CudaTextureObjectCache::getOrCreateTextureObject. cudaErrorInvalidChannelDescriptor: invalid channel descriptor
or
Driveworks exception thrown: DW_INVALID_ARGUMENT: DataConditioner: Given tensor’s properties do not match the expected properties.
Initialize DNN:
{
// If not specified, load the correct network based on platform
std::string tensorRTModel = getArgument("tensorRT_model");
if (tensorRTModel.empty())
{
tensorRTModel = dw_samples::DataPath::get() + "/samples/detector/";
tensorRTModel += getPlatformPrefix();
tensorRTModel += "/model32.bin";
}
// Initialize DNN from a TensorRT file
CHECK_DW_ERROR(dwDNN_initializeTensorRTFromFile(&m_dnn, tensorRTModel.c_str(), nullptr,
DW_PROCESSOR_TYPE_GPU, m_sdk));
CHECK_DW_ERROR(dwDNN_setCUDAStream(m_cudaStream, m_dnn));
// Get input and output dimensions
dwDNNTensorProperties inputProps[NUM_INPUT_TENSORS];
dwDNNTensorProperties outputProps[NUM_OUTPUT_TENSORS];
// Allocate input tensor
for (uint32_t inputIdx = 0U; inputIdx < NUM_INPUT_TENSORS; ++inputIdx)
{
CHECK_DW_ERROR(dwDNN_getInputTensorProperties(&inputProps[inputIdx], inputIdx , m_dnn));
printf("input tensor done\n");
CHECK_DW_ERROR(dwDNNTensor_createNew(&m_dnnInput[inputIdx], &inputProps[inputIdx], m_sdk));
printf("input tensor create done\n");
}
// Allocate outputs
for (uint32_t outputIdx = 0U; outputIdx < NUM_OUTPUT_TENSORS; ++outputIdx)
{
CHECK_DW_ERROR(dwDNN_getOutputTensorProperties(&outputProps[outputIdx], outputIdx, m_dnn));
// Allocate device tensors
CHECK_DW_ERROR(dwDNNTensor_createNew(&m_dnnOutputsDevice[outputIdx], &outputProps[outputIdx], m_sdk));
// Allocate host tensors
dwDNNTensorProperties hostProps = outputProps[outputIdx];
hostProps.tensorType = DW_DNN_TENSOR_TYPE_CPU;
// Allocate streamer
CHECK_DW_ERROR(dwDNNTensorStreamer_initialize(&m_dnnOutputStreamers[outputIdx],
&outputProps[outputIdx],
hostProps.tensorType, m_sdk));
}
// Get coverage and bounding box blob indices
//const char* coverageBlobName = "coverage";
//const char* boundingBoxBlobName = "bboxes";
//CHECK_DW_ERROR(dwDNN_getOutputIndex(&m_cvgIdx, coverageBlobName, m_dnn));
//CHECK_DW_ERROR(dwDNN_getOutputIndex(&m_bboxIdx, boundingBoxBlobName, m_dnn));
// Get metadata from DNN module
// DNN loads metadata automatically from json file stored next to the dnn model,
// with the same name but additional .json extension if present.
// Otherwise, the metadata will be filled with default values and the dataconditioner parameters
// should be filled manually.
dwDNNMetaData metadata;
CHECK_DW_ERROR(dwDNN_getMetaData(&metadata, m_dnn));
for (uint32_t i = 0U; i < NUM_INPUT_TENSORS; ++i){
// Initialie data conditioner
CHECK_DW_ERROR(dwDataConditioner_initializeFromTensorProperties(&m_dataConditioner, &inputProps[i], 1U,
&metadata.dataConditionerParams, m_cudaStream,
m_sdk));
// Detection region
m_detectionRegion[i].width = static_cast<uint32_t>(inputProps[i].dimensionSize[0]);
m_detectionRegion[i].height = static_cast<uint32_t>(inputProps[i].dimensionSize[1]);
m_detectionRegion[i].x = 0;//(m_imageWidth - m_detectionRegion.width) / 2;
m_detectionRegion[i].y = 0;//(m_imageHeight - m_detectionRegion.height) / 2;
}
// Compute a pixel (cell) in output in relation to a pixel in input of the network
uint32_t gridW = outputProps[0].dimensionSize[0];
m_cellSize = inputProps[0].dimensionSize[0] / gridW;
}
Process:
dwImageCUDA* currframecuda = nullptr;
dwImageCUDA* encoder0cuda = nullptr;
dwImageCUDA* encoder1cuda = nullptr;
dwImageCUDA* encoder2cuda = nullptr;
dwImageCUDA* encoder3cuda = nullptr;
dwImageCUDA* encoder4cuda = nullptr;
dwImageHandle_t currentFrame[6];
dwImageProperties currentFrameProps;
currentFrameProps.type = DW_IMAGE_CUDA;
currentFrameProps.memoryLayout = DW_IMAGE_MEMORY_TYPE_PITCH;
currentFrameProps.format = DW_IMAGE_FORMAT_RGB_FLOAT32;
currentFrameProps.height = 224;
currentFrameProps.width = 512;
dwImageProperties encoder0Props;
encoder0Props.type = DW_IMAGE_CUDA;
encoder0Props.memoryLayout = DW_IMAGE_MEMORY_TYPE_PITCH;
encoder0Props.format = DW_IMAGE_FORMAT_R_FLOAT32;
encoder0Props.height = 112;
encoder0Props.width = 256;
dwImageProperties encoder1Props;
encoder1Props.memoryLayout = DW_IMAGE_MEMORY_TYPE_PITCH;
encoder1Props.type = DW_IMAGE_CUDA;
encoder1Props.format = DW_IMAGE_FORMAT_R_FLOAT32;
encoder1Props.height = 56;
encoder1Props.width = 128;
dwImageProperties encoder2Props;
encoder2Props.memoryLayout = DW_IMAGE_MEMORY_TYPE_PITCH;
encoder2Props.type = DW_IMAGE_CUDA;
encoder2Props.format = DW_IMAGE_FORMAT_R_FLOAT32;
encoder2Props.height = 28;
encoder2Props.width = 64;
dwImageProperties encoder3Props;
encoder3Props.memoryLayout = DW_IMAGE_MEMORY_TYPE_PITCH;
encoder3Props.type = DW_IMAGE_CUDA;
encoder3Props.format = DW_IMAGE_FORMAT_R_FLOAT32;
encoder3Props.height = 14;
encoder3Props.width = 32;
dwImageProperties encoder4Props;
encoder4Props.memoryLayout = DW_IMAGE_MEMORY_TYPE_PITCH;
encoder4Props.type = DW_IMAGE_CUDA;
encoder4Props.format = DW_IMAGE_FORMAT_R_FLOAT32;
encoder4Props.height = 7;
encoder4Props.width = 16;
float* hostDataBuffer = (float*)malloc(512 * 224 * 3 * sizeof(float));
loadFileToBuffer("/home/nvidia/input_files/current_frame.raw", (uint8_t *)hostDataBuffer, 512*224*3*4);
float* hostDataBuffer1 = (float*)malloc(256*112*64 * sizeof(float));
loadFileToBuffer("/home/nvidia/input_files/i_encoder_0.raw", (uint8_t *)hostDataBuffer1, 256*112*64*4);
float* hostDataBuffer2 = (float*)malloc(128*56*64 * sizeof(float));
loadFileToBuffer("/home/nvidia/input_files/i_encoder_1.raw", (uint8_t *)hostDataBuffer2, 128*56*64*4);
float* hostDataBuffer3 = (float*)malloc(64*28*128 * sizeof(float));
loadFileToBuffer("/home/nvidia/input_files/i_encoder_2.raw", (uint8_t *)hostDataBuffer3, 64*28*128*4);
float* hostDataBuffer4 = (float*)malloc(32*14*256* sizeof(float));
loadFileToBuffer("/home/nvidia/input_files/i_encoder_3.raw", (uint8_t *)hostDataBuffer4, 32*14*256*4);
float* hostDataBuffer5 = (float*)malloc(16*7*512 * sizeof(float));
loadFileToBuffer("/home/nvidia/input_files/i_encoder_4.raw", (uint8_t *)hostDataBuffer5, 16*7*512*4);
CHECK_DW_ERROR(dwImage_create(¤tFrame[0],currentFrameProps,m_sdk));
CHECK_DW_ERROR(dwImage_getCUDA(&currframecuda,currentFrame[0]));
cudaMemcpy2D(currframecuda->dptr[0], currframecuda->pitch[0], hostDataBuffer,
sizeof(uint8_t) * 512 * 3,
sizeof(uint8_t) * 512 * 3,
224, cudaMemcpyHostToDevice);
CHECK_DW_ERROR(dwImage_create(¤tFrame[1],encoder0Props,m_sdk));
CHECK_DW_ERROR(dwImage_getCUDA(&encoder0cuda,currentFrame[1]));
cudaMemcpy2D(encoder0cuda->dptr[0], encoder0cuda->pitch[0], hostDataBuffer1,
sizeof(uint8_t) * 256 * 64,
sizeof(uint8_t) * 256 * 64,
112, cudaMemcpyHostToDevice);
CHECK_DW_ERROR(dwImage_create(¤tFrame[2],encoder1Props,m_sdk));
CHECK_DW_ERROR(dwImage_getCUDA(&encoder1cuda,currentFrame[2]));
cudaMemcpy2D(encoder1cuda->dptr[0], encoder1cuda->pitch[0], hostDataBuffer2,
sizeof(uint8_t) * 128 * 64,
sizeof(uint8_t) * 128 * 64,
56, cudaMemcpyHostToDevice);
CHECK_DW_ERROR(dwImage_create(¤tFrame[3],encoder2Props,m_sdk));
CHECK_DW_ERROR(dwImage_getCUDA(&encoder2cuda,currentFrame[3]));
cudaMemcpy2D(encoder2cuda->dptr[0], encoder2cuda->pitch[0], hostDataBuffer3,
sizeof(uint8_t) * 64 * 128,
sizeof(uint8_t) * 64 * 128,
28, cudaMemcpyHostToDevice);
CHECK_DW_ERROR(dwImage_create(¤tFrame[4],encoder3Props,m_sdk));
CHECK_DW_ERROR(dwImage_getCUDA(&encoder3cuda,currentFrame[4]));
cudaMemcpy2D(encoder3cuda->dptr[0], encoder3cuda->pitch[0], hostDataBuffer4,
sizeof(uint8_t) * 32 * 256,
sizeof(uint8_t) * 32 * 256,
14, cudaMemcpyHostToDevice);
CHECK_DW_ERROR(dwImage_create(¤tFrame[5],encoder4Props,m_sdk));
CHECK_DW_ERROR(dwImage_getCUDA(&encoder4cuda,currentFrame[5]));
cudaMemcpy2D(encoder4cuda->dptr[0], encoder4cuda->pitch[0], hostDataBuffer5,
sizeof(uint8_t) * 16 * 512,
sizeof(uint8_t) * 16 * 512,
7, cudaMemcpyHostToDevice);
auto inputImgs = static_cast<dwImageHandle_t const *>(currentFrame);
CHECK_DW_ERROR(dwDataConditioner_prepareData(m_dnnInput[0], inputImgs, 6U, &m_detectionRegion[0],
cudaAddressModeClamp, m_dataConditioner));
dwConstDNNTensorHandle_t inputs[1U] = {m_dnnInput[0]};
CHECK_DW_ERROR(dwDNN_infer(m_dnnOutputsDevice, NUM_OUTPUT_TENSORS, inputs, 6U, m_dnn));
Please let me what I am missing, do we need to have 6 input tensor and 6 data conditioner handle or single is enough,