Please provide complete information as applicable to your setup.
• Hardware Platform (Jetson / GPU)
• DeepStream Version
• JetPack Version (valid for Jetson only)
• TensorRT Version
• NVIDIA GPU Driver Version (valid for GPU only)
• Issue Type( questions, new requirements, bugs)
• How to reproduce the issue ? (This is for bugs. Including which sample app is using, the configuration files content, the command line used and other details for reproducing)
• Requirement details( This is for new requirement. Including the module name-for which plugin or for which sample application, the function description)
why no parallel in this code?
int image_size = 3 * image_h * image_w;
float* rgb_buf = nullptr;
cudaMalloc(&rgb_buf, image_size * 6 * sizeof(float));
float scale = 1.0f;
omp_set_num_threads(6); //设置线程的个数
std::vector<cv::cuda::Stream> vecStream = {cv::cuda::Stream(), cv::cuda::Stream(), cv::cuda::Stream(), cv::cuda::Stream(), cv::cuda::Stream(), cv::cuda::Stream()};
std::vector<cv::Mat> image_mat;
pushbach 6 mat
{
#pragma omp parallel for schedule(static, 1) shared(vecStream)
for (size_t i = 0; i < 6; ++i)
{
cv::cuda::GpuMat gpuOutImage;
gpuOutImage.upload(image_mat[i], vecStream[i]);
cv::cuda::resize(gpuOutImage, gpuOutImage, cv::Size(image_h, image_w), 0, 0, cv::INTER_LINEAR, vecStream[i]);
cv::cuda::GpuMat flt_image(image_h, image_w, CV_32FC3, rgb_buf + image_size * i);
gpuOutImage.convertTo(flt_image, CV_32FC3, scale, vecStream[i]);
cv::cuda::subtract(flt_image, cv::Scalar(103.530, 116.280, 123.675), flt_image,cv::noArray(), -1, vecStream[i]);
cv::cuda::divide(flt_image, cv::Scalar(57.375, 57.120, 58.395), flt_image, 1, -1, vecStream[i]);
}
}
nsys data is