Sgie doesnt run on tensor from nvdspreprocess

• Hardware Platform : dGPU
• DeepStream Version: 7.0
• TensorRT Version: 8.6.1.6
**• NVIDIA GPU Driver Version: 555.58.02 **
• Issue Type: Question

Currently my pipeline look like this:

pgie -> nvdspreprocess -> sgie

This is my custom preprocessing code for nvdspreprocess. I have visualized my image and everything works, and the cuda copy doesnt give me any error.

NvDsPreProcessStatus
CustomTensorPreparation(CustomCtx *ctx, NvDsPreProcessBatch *batch, NvDsPreProcessCustomBuf *&buf,
                        CustomTensorParams &tensorParam, NvDsPreProcessAcquirer *acquirer)
{

  NvDsPreProcessStatus status = NVDSPREPROCESS_TENSOR_NOT_READY;
  
  /** acquire a buffer from tensor pool */
  buf = acquirer->acquire();
  // size_t buffer_offset = 0;  // Offset within the buffer
  float * pDst = (float*)buf->memory_ptr;
  int units = batch->units.size();

  for(int i = 0; i < units; i++)
  {
    printf("Preprocessing");
    //unsigned char* raw_data = (unsigned char*)batch->units[i].converted_frame_ptr;
    NvBufSurfaceParams *surf_params = batch->units[i].roi_meta.converted_buffer;


    switch (surf_params->colorFormat) {
        case NVBUF_COLOR_FORMAT_RGBA:
            std::cout << "Color Format: RGBA" << std::endl;
            break;
        case NVBUF_COLOR_FORMAT_RGB:
            std::cout << "Color Format: RGB" << std::endl;
            break;
        // Add cases for other color formats as needed
        default:
            std::cout << "Color Format: Unknown" << std::endl;
            break;
    }
      // dim3 blockDim(16, 16);
      // dim3 gridDim((width + blockDim.x - 1) / blockDim.x,
      //              (height + blockDim.y - 1) / blockDim.y);
      //   NvDsPreProcessNormalizeInPlaceKernel<<<gridDim, blockDim>>>(
      //     surf_params->dataPtr, width, height, pitch);
      NvDsRoiMeta roi_meta = batch->units[i].roi_meta;
      NvDsObjectMeta *obj_meta = (NvDsObjectMeta *)batch->units[i].roi_meta.object_meta;
      printf("Object ID: %d\n", obj_meta->unique_component_id);
      float x_scale = roi_meta.scale_ratio_x;
      float y_scale = roi_meta.scale_ratio_y;
      float left = obj_meta->rect_params.left;
      float top = obj_meta->rect_params.top;
      int width = obj_meta->rect_params.width;
      int height = obj_meta->rect_params.height;
      printf("Tensor buffer size: %d",te)
      printf("retrieved array:%ld",obj_meta->misc_obj_info[0]);
      printf("left: %f\n", left);
      printf("top: %f\n", top);
      printf("width: %d\n", width);
      printf("height: %d\n", height);
      printf("Landmark height: %d\n", obj_meta->mask_params.height);
      printf("Landmark width: %d\n", obj_meta->mask_params.width);
      printf("Landmark size: %d\n", obj_meta->mask_params.size);


      if (obj_meta->mask_params.data != nullptr && obj_meta->mask_params.size > 0) {
          float* mask_data = obj_meta->mask_params.data;
          unsigned int mask_size = obj_meta->mask_params.size / sizeof(float); // Number of float elements
          for (unsigned int i = 0; i < mask_size; ++i) {
              float value = mask_data[i];
              printf("Landmark value: %f\n",value);
              // Process the mask value as needed
          }
          cv::Point2f leftEye(mask_data[0]* x_scale,mask_data[1] * y_scale);  
          cv::Point2f rightEye(mask_data[2] * x_scale,mask_data[3]* y_scale); 
          cv::Mat frame(surf_params->height, surf_params->width,CV_8UC4 ,surf_params->dataPtr, surf_params->pitch);
          cv::Mat image_data(surf_params->height, surf_params->width,CV_8UC4 );
          frame.copyTo(image_data);
          cv::Mat outptut_image(surf_params->height, surf_params->width,CV_8UC3 );
          cv::cvtColor(image_data,outptut_image, cv::COLOR_RGBA2BGR);
          cv::Mat alignedFace = alignFace(outptut_image, leftEye, rightEye);
          cv::imwrite("output.jpg", alignedFace);
          cv::Mat floatAlignedFace;
          alignedFace.convertTo(floatAlignedFace, CV_32FC3, 1.0 / 255.0);
      

          printf("Start copying \n");
          void *outPtr = (void*)((float*)pDst + i * 3
              * 160 * 160
              * sizeof(float));
          size_t image_size =3 * 160 * 160 * sizeof(float);
          printf("Start copying \n");
                    // Log the pointer and size for verification
          printf("outPtr: %p\n", outPtr);
          printf("image_size: %zu bytes\n", image_size);
          printf("floatAlignedFace.data: %p\n", floatAlignedFace.data);
          cudaMemcpy(outPtr, floatAlignedFace.data, image_size, cudaMemcpyHostToDevice);
          
          cudaError_t err = cudaGetLastError();
          if (err != cudaSuccess) {
              printf("CUDA memcpy error: %s\n", cudaGetErrorString(err));
          } else {
              printf("CUDA memcpy successful.\n");
          }
    }
    }
  tensorParam.params.network_input_shape[0] = (int)batch->units.size();
  status = NVDSPREPROCESS_SUCCESS;
  return status;
  }

Here is my config:
face_embedding_preprocess.txt (2.7 KB)

face_embedding_config.txt (822 Bytes)

I also have setted input-tensor-meta to 1.

sgie_face_embedding = make_element("nvinfer", "face_embedding")
sgie_face_embedding.set_property('config-file-path', "Face/face_embedding_config.txt")
sgie_face_embedding.set_property("input-tensor-meta", 1)
sgie_face_embedding.set_property("output-tensor-meta", 1)

There is no prediction from sgie (no tensor attached to user meta in sgie’s src_pad). Note that my pipeline still works without nvdspreprocess. I wonder if there is anything wrong with my config or my nvdspreprocess code.

Are you sure the model you used can detect the results properly? You can also add some log in the gstnvinfer.cpp to check if sgie works in the input_tensor_from_meta mode.

  if (nvinfer->input_tensor_from_meta) {
   flow_ret = gst_nvinfer_process_tensor_input (nvinfer, inbuf, in_surf);
  } else if (nvinfer->process_full_frame) {
   flow_ret = gst_nvinfer_process_full_frame (nvinfer, inbuf, in_surf);
  } else {
    flow_ret = gst_nvinfer_process_objects (nvinfer, inbuf, in_surf);
  }

Well i found out that my config is wrong, setting target-unique-ids=2 works. However somehow it only do preprocess on about 5-6 objects. I have setted scaling-buf-pool-size=100 tensor-buf-pool-size=100.

i have hundreds of objects in frame but somehow batch->units.size() is capped at 6-7 so the plugin only do preprocess on 6-7 objects. I tried increase scaling-buf-pool-size and tensor-buf-pool-size but no help. For example, only one source here is preprocessed(face with the red bounding box).

I tried this config and turns out each group only do preprocess on one src, even if i set src_id = -1. It there any config to make this works on all objects across all srcs?

[group-0]
src-ids=1
custom-input-transformation-function=CustomAsyncTransformation
process-on-all-objects=1
process-on-roi=0
roi-params-src-0=0;540;900;500;960;0;900;500;0;0;540;900;
draw-roi=0
[group-1]
src-ids=0
custom-input-transformation-function=CustomAsyncTransformation
process-on-all-objects=1
process-on-roi=0
roi-params-src-0=0;540;900;500;960;0;900;500;0;0;540;900;
draw-roi=0

We don’t support set src-ids to -1. Please refer to our Guide Gst-nvdspreprocess group- Group Supported Keys. You can set that to 0;1;2....

Thanks, i ended up setting it from 1 to 40.

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.