Depthanythingvb2

Jetson nano orin super
latest jetpack and deepstream, fresh install.

Hello,
I just got my jetson a few days ago and have been playing around with deepsteam. I’ve managed to run a few of the test apps. For my project I would like to run depthanythingv2 in my deepstream pipeline. But I dont really understand where to start.

Can I just get a few pointers in the right directions?
Thanks!

The latest deepstream 7.1 needs to work with jetpack 6.1. If you burn Jetpack 6.2, there may be some problems.

The steps are usually as follows:

  1. Export the depthanythingv2 model as onnx,
  2. Build a deepstream pipeline, usually like
source --> nvstreammux --> nvinfer --> sink

You can refer to test1 to build the pipeline

3.deepstream currently cannot draw depth directly into an image via nvdsosd, So you can set output-tensor-meta to 1 in the configuration file, so that the output of the model will be attached to the user meta of type NVDSINFER_TENSOR_OUTPUT_META

Then add a probe function to the src pad of nvinfer to read and process NVDSINFER_TENSOR_OUTPUT_META. For more information, please refer to /opt/nvidia/deepstream/deepstream/sources/apps/sample_apps/deepstream-infer-tensor-meta-test/deepstream_infer_tensor_meta_test.cpp

Hi,
thank you for the reply. I managed to export it as an onnx and trt. I think I understand how to change the pipeline. But could you expand further on the 3rd step. I’m not quite sure what to do here practically.

Add output-tensor-meta=1 in nvinfer configuration file.

[property]
gpu-id=0
net-scale-factor=0.00392156862745098
onnx-file=depth_anything_v2_vits_dynamic.onnx
model-engine-file=depth_anything_v2_vits_dynamic.onnx_b1_gpu0_fp32.engine
batch-size=1
infer-dims=3;518;518
## 0=FP32, 1=INT8, 2=FP16 mode
network-mode=0
interval=0
gie-unique-id=1
#scaling-filter=0
#scaling-compute-hw=0
## 0=Detector, 1=Classifier, 2=Segmentation, 100=Other
network-type=100
# Enable tensor metadata output
# [batch, height, width]
output-tensor-meta=1
process-mode=1
model-color-format=0

Take the tensor data from NvDsInferTensorMeta, then save it as image or composite to video frame as needed

#include <glib.h>
#include <gst/gst.h>
#include <opencv2/opencv.hpp>
#include <stdio.h>

#include "cuda_runtime_api.h"
#include "gstnvdsinfer.h"
#include "gstnvdsmeta.h"

/* The muxer output resolution must be set if the input streams will be of
 * different resolution. The muxer will scale all the input frames to this
 * resolution. */
#define MUXER_OUTPUT_WIDTH 1920
#define MUXER_OUTPUT_HEIGHT 1080

/* Muxer batch formation timeout, for e.g. 40 millisec. Should ideally be set
 * based on the fastest source's framerate. */
#define MUXER_BATCH_TIMEOUT_USEC 40000

gint frame_number = 0;

static GstPadProbeReturn
pgie_src_pad_buffer_probe(GstPad *pad, GstPadProbeInfo *info, gpointer u_data) {
  GstBuffer *buf = (GstBuffer *)info->data;
  NvDsMetaList *l_frame = NULL;
  NvDsBatchMeta *batch_meta = gst_buffer_get_nvds_batch_meta(buf);

  for (l_frame = batch_meta->frame_meta_list; l_frame != NULL;
       l_frame = l_frame->next) {
    NvDsFrameMeta *frame_meta = (NvDsFrameMeta *)(l_frame->data);

    /* Iterate user metadata in frames to search PGIE's tensor metadata */
    for (NvDsMetaList *l_user = frame_meta->frame_user_meta_list;
         l_user != NULL; l_user = l_user->next) {
      NvDsUserMeta *user_meta = (NvDsUserMeta *)l_user->data;
      if (user_meta->base_meta.meta_type != NVDSINFER_TENSOR_OUTPUT_META)
        continue;

      /* convert to tensor metadata */
      NvDsInferTensorMeta *meta =
          (NvDsInferTensorMeta *)user_meta->user_meta_data;
      for (unsigned int i = 0; i < meta->num_output_layers; i++) {
        NvDsInferLayerInfo *info = &meta->output_layers_info[i];
        info->buffer = meta->out_buf_ptrs_host[i];
        // printf("layer name %s dtype %d dims", info->layerName, info->dataType);
        // for (int i = 0; i < info->inferDims.numDims; i++) {
        //   printf(" %d", info->inferDims.d[i]);
        // }
        // printf("\n");

        auto input_h = info->inferDims.d[0];
        auto input_w = info->inferDims.d[1];
        auto depth_data = info->buffer;
        auto img_w = frame_meta->pipeline_width;
        auto img_h = frame_meta->pipeline_height;
        // printf("img_w %d img_h %d \n", img_w, img_h);
        // Convert the entire depth_data vector to a CV_32FC1 Mat
        cv::Mat depth_mat(input_h, input_w, CV_32FC1, depth_data);
        cv::normalize(depth_mat, depth_mat, 0, 255, cv::NORM_MINMAX, CV_8U);

        // Create a colormap from the depth data
        cv::Mat colormap;
        cv::applyColorMap(depth_mat, colormap, cv::COLORMAP_INFERNO);

        // Rescale the colormap
        int limX, limY;
        if (img_w > img_h) {
          limX = input_w;
          limY = input_w * img_h / img_w;
        } else {
          limX = input_w * img_w / img_h;
          limY = input_w;
        }
        cv::resize(colormap, colormap, cv::Size(img_w, img_h));

        if (frame_number % 10 == 0) {
          char buf[64] = { 0 };
          std::snprintf(buf, sizeof(buf), "out%d.png", frame_number);
          // std::string filename(buf);
          cv::imwrite(buf, colormap);
        }
      }
    }
  }
  g_print("Frame Number = %d \n", frame_number);
  frame_number++;
  return GST_PAD_PROBE_OK;
}

static gboolean bus_call(GstBus *bus, GstMessage *msg, gpointer data) {
  GMainLoop *loop = (GMainLoop *)data;
  switch (GST_MESSAGE_TYPE(msg)) {
  case GST_MESSAGE_EOS:
    g_print("End of stream\n");
    g_main_loop_quit(loop);
    break;
  case GST_MESSAGE_ERROR: {
    gchar *debug = NULL;
    GError *error = NULL;
    gst_message_parse_error(msg, &error, &debug);
    g_printerr("ERROR from element %s: %s\n", GST_OBJECT_NAME(msg->src),
               error->message);
    if (debug)
      g_printerr("Error details: %s\n", debug);
    g_free(debug);
    g_error_free(error);
    g_main_loop_quit(loop);
    break;
  }
  default:
    break;
  }
  return TRUE;
}

int main(int argc, char *argv[]) {
  GMainLoop *loop = NULL;
  GstElement *pipeline = NULL, *source = NULL, *h264parser = NULL,
             *decoder = NULL, *streammux = NULL, *sink = NULL, *pgie = NULL;

  GstBus *bus = NULL;
  guint bus_watch_id;
  GstPad *pgie_src_pad = NULL;

  int current_device = -1;
  cudaGetDevice(&current_device);
  struct cudaDeviceProp prop;
  cudaGetDeviceProperties(&prop, current_device);
  /* Check input arguments */
  if (argc != 2) {
    g_printerr("%s <H264 filename>\n", argv[0]);
    return -1;
  }

  /* Standard GStreamer initialization */
  gst_init(&argc, &argv);
  loop = g_main_loop_new(NULL, FALSE);

  /* Create gstreamer elements */
  /* Create Pipeline element that will form a connection of other elements */
  pipeline = gst_pipeline_new("dsdepth-pipeline");

  /* Source element for reading from the file */
  source = gst_element_factory_make("filesrc", "file-source");

  /* Since the data format in the input file is elementary h264 stream,
   * we need a h264parser */
  h264parser = gst_element_factory_make("h264parse", "h264-parser");

  /* Use nvdec_h264 for hardware accelerated decode on GPU */
  decoder = gst_element_factory_make("nvv4l2decoder", "nvv4l2-decoder");

  /* Create nvstreammux instance to form batches from one or more sources. */
  streammux = gst_element_factory_make("nvstreammux", "stream-muxer");

  if (!pipeline || !streammux) {
    g_printerr("One element could not be created. Exiting.\n");
    return -1;
  }

  pgie = gst_element_factory_make("nvinfer", "primary-nvinference-engine");

  /* Finally render the osd output */
  if (prop.integrated) {
    sink = gst_element_factory_make("nv3dsink", "nv3d-sink");
  } else {
#ifdef __aarch64__
    sink = gst_element_factory_make("nv3dsink", "nvvideo-renderer");
#else
    sink = gst_element_factory_make("nveglglessink", "nvvideo-renderer");
#endif
  }

  if (!source || !h264parser || !decoder || !pgie || !sink) {
    g_printerr("One element could not be created. Exiting.\n");
    return -1;
  }

  /* we set the input filename to the source element */
  g_object_set(G_OBJECT(source), "location", argv[1], NULL);

  if (g_str_has_suffix(argv[1], ".h264")) {
    g_object_set(G_OBJECT(source), "location", argv[1], NULL);

    g_object_set(G_OBJECT(streammux), "batch-size", 1, NULL);

    g_object_set(G_OBJECT(streammux), "width", MUXER_OUTPUT_WIDTH, "height",
                 MUXER_OUTPUT_HEIGHT, "batched-push-timeout",
                 MUXER_BATCH_TIMEOUT_USEC, NULL);

    /* Set all the necessary properties of the nvinfer element,
     * the necessary ones are : */
    g_object_set(G_OBJECT(pgie), "config-file-path", "dsdepth_pgie_config.txt",
                 NULL);
  }

  /* we add a message handler */
  bus = gst_pipeline_get_bus(GST_PIPELINE(pipeline));
  bus_watch_id = gst_bus_add_watch(bus, bus_call, loop);
  gst_object_unref(bus);

  /* Set up the pipeline */
  /* we add all elements into the pipeline */
  gst_bin_add_many(GST_BIN(pipeline), source, h264parser, decoder, streammux,
                   pgie, sink, NULL);
  g_print("Added elements to bin\n");

  GstPad *sinkpad, *srcpad;
  gchar pad_name_sink[16] = "sink_0";
  gchar pad_name_src[16] = "src";

  sinkpad = gst_element_request_pad_simple(streammux, pad_name_sink);
  if (!sinkpad) {
    g_printerr("Streammux request sink pad failed. Exiting.\n");
    return -1;
  }

  srcpad = gst_element_get_static_pad(decoder, pad_name_src);
  if (!srcpad) {
    g_printerr("Decoder request src pad failed. Exiting.\n");
    return -1;
  }

  if (gst_pad_link(srcpad, sinkpad) != GST_PAD_LINK_OK) {
    g_printerr("Failed to link decoder to stream muxer. Exiting.\n");
    return -1;
  }

  gst_object_unref(sinkpad);
  gst_object_unref(srcpad);

  /* we link the elements together */
  /* file-source -> h264-parser -> nvh264-decoder ->
   * pgie -> nvvidconv -> nvosd -> video-renderer */

  if (!gst_element_link_many(source, h264parser, decoder, NULL)) {
    g_printerr("Elements could not be linked: 1. Exiting.\n");
    return -1;
  }

  if (!gst_element_link_many(streammux, pgie, sink, NULL)) {
    g_printerr("Elements could not be linked: 2. Exiting.\n");
    return -1;
  }

  /* Lets add probe to get informed of the meta data generated, we add probe
   * to the sink pad of the osd element, since by that time, the buffer would
   * have had got all the metadata. */
  pgie_src_pad = gst_element_get_static_pad(pgie, "src");
  if (!pgie_src_pad)
    g_print("Unable to get sink pad\n");
  else
    gst_pad_add_probe(pgie_src_pad, GST_PAD_PROBE_TYPE_BUFFER,
                      pgie_src_pad_buffer_probe, NULL, NULL);
  gst_object_unref(pgie_src_pad);

  /* Set the pipeline to "playing" state */
  g_print("Using file: %s\n", argv[1]);
  gst_element_set_state(pipeline, GST_STATE_PLAYING);

  /* Wait till pipeline encounters an error or EOS */
  g_print("Running...\n");
  g_main_loop_run(loop);

  /* Out of the main loop, clean up nicely */
  g_print("Returned, stopping playback\n");
  gst_element_set_state(pipeline, GST_STATE_NULL);
  g_print("Deleting pipeline\n");
  gst_object_unref(GST_OBJECT(pipeline));
  g_source_remove(bus_watch_id);
  g_main_loop_unref(loop);
  return 0;
}

The above code and configuration file only show the process and are not completely correct. Please debug it based on your model.

This is the result I got

Okay, I think I am getting closer. My output doesnt look anything close to yours though.

#include <glib.h>
#include <gst/gst.h>
#include <opencv2/opencv.hpp>
#include <stdio.h>

#include "cuda_runtime_api.h"
#include "gstnvdsinfer.h"
#include "gstnvdsmeta.h"

/* The muxer output resolution must be set if the input streams will be of
 * different resolution. The muxer will scale all the input frames to this
 * resolution. */
#define MUXER_OUTPUT_WIDTH 1920
#define MUXER_OUTPUT_HEIGHT 1080

/* Muxer batch formation timeout, for e.g. 40 millisec. Should ideally be set
 * based on the fastest source's framerate. */
#define MUXER_BATCH_TIMEOUT_USEC 40000

gint frame_number = 0;

static GstPadProbeReturn
pgie_src_pad_buffer_probe(GstPad *pad, GstPadProbeInfo *info, gpointer u_data) {
  GstBuffer *buf = (GstBuffer *)info->data;
  NvDsMetaList *l_frame = NULL;
  NvDsBatchMeta *batch_meta = gst_buffer_get_nvds_batch_meta(buf);

  for (l_frame = batch_meta->frame_meta_list; l_frame != NULL;
       l_frame = l_frame->next) {
    NvDsFrameMeta *frame_meta = (NvDsFrameMeta *)(l_frame->data);

    /* Iterate user metadata in frames to search PGIE's tensor metadata */
    for (NvDsMetaList *l_user = frame_meta->frame_user_meta_list;
         l_user != NULL; l_user = l_user->next) {
      NvDsUserMeta *user_meta = (NvDsUserMeta *)l_user->data;
      if (user_meta->base_meta.meta_type != NVDSINFER_TENSOR_OUTPUT_META)
        continue;

      /* convert to tensor metadata */
      NvDsInferTensorMeta *meta =
          (NvDsInferTensorMeta *)user_meta->user_meta_data;
      for (unsigned int i = 0; i < meta->num_output_layers; i++) {
        NvDsInferLayerInfo *info = &meta->output_layers_info[i];
        info->buffer = meta->out_buf_ptrs_host[i];
        // printf("layer name %s dtype %d dims", info->layerName, info->dataType);
        // for (int i = 0; i < info->inferDims.numDims; i++) {
        //   printf(" %d", info->inferDims.d[i]);
        // }
        // printf("\n");

        auto input_h = info->inferDims.d[0];
        auto input_w = info->inferDims.d[1];
        auto depth_data = info->buffer;
        auto img_w = frame_meta->pipeline_width;
        auto img_h = frame_meta->pipeline_height;
        // printf("img_w %d img_h %d \n", img_w, img_h);
        // Convert the entire depth_data vector to a CV_32FC1 Mat
        cv::Mat depth_mat(input_h, input_w, CV_32FC1, depth_data);
        cv::normalize(depth_mat, depth_mat, 0, 255, cv::NORM_MINMAX, CV_8U);

        // Create a colormap from the depth data
        cv::Mat colormap;
        cv::applyColorMap(depth_mat, colormap, cv::COLORMAP_INFERNO);

        // Rescale the colormap
        int limX, limY;
        if (img_w > img_h) {
          limX = input_w;
          limY = input_w * img_h / img_w;
        } else {
          limX = input_w * img_w / img_h;
          limY = input_w;
        }
        cv::resize(colormap, colormap, cv::Size(img_w, img_h));

        if (frame_number % 10 == 0) {
          char buf[64] = { 0 };
          std::snprintf(buf, sizeof(buf), "out%d.png", frame_number);
          // std::string filename(buf);
          cv::imwrite(buf, colormap);
        }
      }
    }
  }
  g_print("Frame Number = %d \n", frame_number);
  frame_number++;
  return GST_PAD_PROBE_OK;
}

static gboolean bus_call(GstBus *bus, GstMessage *msg, gpointer data) {
  GMainLoop *loop = (GMainLoop *)data;
  switch (GST_MESSAGE_TYPE(msg)) {
  case GST_MESSAGE_EOS:
    g_print("End of stream\n");
    g_main_loop_quit(loop);
    break;
  case GST_MESSAGE_ERROR: {
    gchar *debug = NULL;
    GError *error = NULL;
    gst_message_parse_error(msg, &error, &debug);
    g_printerr("ERROR from element %s: %s\n", GST_OBJECT_NAME(msg->src),
               error->message);
    if (debug)
      g_printerr("Error details: %s\n", debug);
    g_free(debug);
    g_error_free(error);
    g_main_loop_quit(loop);
    break;
  }
  default:
    break;
  }
  return TRUE;
}

int main(int argc, char *argv[]) {
  GMainLoop *loop = NULL;
  GstElement *pipeline = NULL, *source = NULL, *h264parser = NULL,
             *decoder = NULL, *streammux = NULL, *sink = NULL, *pgie = NULL;

  GstBus *bus = NULL;
  guint bus_watch_id;
  GstPad *pgie_src_pad = NULL;

  int current_device = -1;
  cudaGetDevice(&current_device);
  struct cudaDeviceProp prop;
  cudaGetDeviceProperties(&prop, current_device);
  /* Check input arguments */
  if (argc != 2) {
    g_printerr("%s <H264 filename>\n", argv[0]);
    return -1;
  }

  /* Standard GStreamer initialization */
  gst_init(&argc, &argv);
  loop = g_main_loop_new(NULL, FALSE);

  /* Create gstreamer elements */
  /* Create Pipeline element that will form a connection of other elements */
  pipeline = gst_pipeline_new("dsdepth-pipeline");

  /* Source element for reading from the file */
  source = gst_element_factory_make("filesrc", "file-source");

  /* Since the data format in the input file is elementary h264 stream,
   * we need a h264parser */
  h264parser = gst_element_factory_make("h264parse", "h264-parser");

  /* Use nvdec_h264 for hardware accelerated decode on GPU */
  decoder = gst_element_factory_make("nvv4l2decoder", "nvv4l2-decoder");

  /* Create nvstreammux instance to form batches from one or more sources. */
  streammux = gst_element_factory_make("nvstreammux", "stream-muxer");

  if (!pipeline || !streammux) {
    g_printerr("One element could not be created. Exiting.\n");
    return -1;
  }

  pgie = gst_element_factory_make("nvinfer", "primary-nvinference-engine");

  /* Finally render the osd output */
  if (prop.integrated) {
    sink = gst_element_factory_make("nv3dsink", "nv3d-sink");
  } else {
#ifdef __aarch64__
    sink = gst_element_factory_make("nv3dsink", "nvvideo-renderer");
#else
    sink = gst_element_factory_make("nveglglessink", "nvvideo-renderer");
#endif
  }

  if (!source || !h264parser || !decoder || !pgie || !sink) {
    g_printerr("One element could not be created. Exiting.\n");
    return -1;
  }

  /* we set the input filename to the source element */
  g_object_set(G_OBJECT(source), "location", argv[1], NULL);

  if (g_str_has_suffix(argv[1], ".h264")) {
    g_object_set(G_OBJECT(source), "location", argv[1], NULL);

    g_object_set(G_OBJECT(streammux), "batch-size", 1, NULL);

    g_object_set(G_OBJECT(streammux), "width", MUXER_OUTPUT_WIDTH, "height",
                 MUXER_OUTPUT_HEIGHT, "batched-push-timeout",
                 MUXER_BATCH_TIMEOUT_USEC, NULL);

    /* Set all the necessary properties of the nvinfer element,
     * the necessary ones are : */
    g_object_set(G_OBJECT(pgie), "config-file-path", "dsdepth_pgie_config.txt",
                 NULL);
  }

  /* we add a message handler */
  bus = gst_pipeline_get_bus(GST_PIPELINE(pipeline));
  bus_watch_id = gst_bus_add_watch(bus, bus_call, loop);
  gst_object_unref(bus);

  /* Set up the pipeline */
  /* we add all elements into the pipeline */
  gst_bin_add_many(GST_BIN(pipeline), source, h264parser, decoder, streammux,
                   pgie, sink, NULL);
  g_print("Added elements to bin\n");

  GstPad *sinkpad, *srcpad;
  gchar pad_name_sink[16] = "sink_0";
  gchar pad_name_src[16] = "src";

  sinkpad = gst_element_request_pad_simple(streammux, pad_name_sink);
  if (!sinkpad) {
    g_printerr("Streammux request sink pad failed. Exiting.\n");
    return -1;
  }

  srcpad = gst_element_get_static_pad(decoder, pad_name_src);
  if (!srcpad) {
    g_printerr("Decoder request src pad failed. Exiting.\n");
    return -1;
  }

  if (gst_pad_link(srcpad, sinkpad) != GST_PAD_LINK_OK) {
    g_printerr("Failed to link decoder to stream muxer. Exiting.\n");
    return -1;
  }

  gst_object_unref(sinkpad);
  gst_object_unref(srcpad);

  /* we link the elements together */
  /* file-source -> h264-parser -> nvh264-decoder ->
   * pgie -> nvvidconv -> nvosd -> video-renderer */

  if (!gst_element_link_many(source, h264parser, decoder, NULL)) {
    g_printerr("Elements could not be linked: 1. Exiting.\n");
    return -1;
  }

  if (!gst_element_link_many(streammux, pgie, sink, NULL)) {
    g_printerr("Elements could not be linked: 2. Exiting.\n");
    return -1;
  }

  /* Lets add probe to get informed of the meta data generated, we add probe
   * to the sink pad of the osd element, since by that time, the buffer would
   * have had got all the metadata. */
  pgie_src_pad = gst_element_get_static_pad(pgie, "src");
  if (!pgie_src_pad)
    g_print("Unable to get sink pad\n");
  else
    gst_pad_add_probe(pgie_src_pad, GST_PAD_PROBE_TYPE_BUFFER,
                      pgie_src_pad_buffer_probe, NULL, NULL);
  gst_object_unref(pgie_src_pad);

  /* Set the pipeline to "playing" state */
  g_print("Using file: %s\n", argv[1]);
  gst_element_set_state(pipeline, GST_STATE_PLAYING);

  /* Wait till pipeline encounters an error or EOS */
  g_print("Running...\n");
  g_main_loop_run(loop);

  /* Out of the main loop, clean up nicely */
  g_print("Returned, stopping playback\n");
  gst_element_set_state(pipeline, GST_STATE_NULL);
  g_print("Deleting pipeline\n");
  gst_object_unref(GST_OBJECT(pipeline));
  g_source_remove(bus_watch_id);
  g_main_loop_unref(loop);
  return 0;
}```


[property]
gpu-id=0
net-scale-factor=0.00392156862745098
onnx-file=Depth-Anything-V2.onnx
model-engine-file=depth-anything-v2.onnx_b1_gpu0_fp32.engine
batch-size=1
infer-dims=3;518;518
## 0=FP32, 1=INT8, 2=FP16 mode
network-mode=0
interval=0
gie-unique-id=1
#scaling-filter=0
#scaling-compute-hw=0
## 0=Detector, 1=Classifier, 2=Segmentation, 100=Other
network-type=100
# Enable tensor metadata output
# [batch, height, width]
output-tensor-meta=1
process-mode=1
model-color-format=0

Please check your model, this is not a deepstream problem, I refer to the following project