Hardware Platform Nvidia Tesla T4
• DeepStream Version 5.1
• TensorRT Version 7.2.2.3
• NVIDIA GPU Driver Version (valid for GPU only) 460.32…03
• Issue Type( questions, new requirements, bugs) question
Hi, I am trying to port a SCRFD face detector code to deepstream.
But in customparser plugin code, cuda_stream is required.
code snippet-
const int MAX_IMAGE_BBOX = 1024;
const int NUM_BOX_ELEMENT = 16; // left, top, right, bottom, confidence, keepflag(1keep,0ignore), landmark(x, y) * 5|
TRT::Tensor affin_matrix_device(TRT::DataType::Float);|
TRT::Tensor output_array_device(TRT::DataType::Float);|
TRT::Tensor prior(TRT::DataType::Float);|
float confidence_threshold_=0.5;|
float nms_threshold_=0.5;|
int max_batch_size = 3;|
output_array_device.to_gpu(false);|
affin_matrix_device.resize(max_batch_size, 8).to_gpu();
output_array_device.resize(max_batch_size, 1 + MAX_IMAGE_BBOX * NUM_BOX_ELEMENT).to_gpu();
NvDsInferLayerInfo layer_out = layerFinder(“465”);
"for(int ibatch = 0; ibatch < 1; ++ibatch){
//auto& job = fetch_jobs[ibatch];
float image_based_output = (float )layer_out->data;
float output_array_ptr = output_array_device.gpu(ibatch);
std::cout<<"output_array_ptr “<<output_array_ptr<<std::endl;
auto affine_matrix = affin_matrix_device.gpu(ibatch);
checkCudaRuntime(cudaMemsetAsync(output_array_ptr, 0, sizeof(int), cuda_stream));
Scrfd::decode_kernel_invoker(
image_based_output,
16800, confidence_threshold_, nms_threshold_, affine_matrix,
output_array_ptr, MAX_IMAGE_BBOX, prior.gpu(),
cuda_stream);
}”
So how can i get cuda_stream here from upstream?
Link that i am following for reference -
Thanks.