DW_INTERNAL_ERROR on dwImageStreamer_producerSend call

Please provide the following info (check/uncheck the boxes after creating this topic):
Software Version
DRIVE OS Linux 5.2.6
DRIVE OS Linux 5.2.6 and DriveWorks 4.0
DRIVE OS Linux 5.2.0
DRIVE OS Linux 5.2.0 and DriveWorks 3.5
NVIDIA DRIVE™ Software 10.0 (Linux)
NVIDIA DRIVE™ Software 9.0 (Linux)
other DRIVE OS version
other

Target Operating System
Linux
QNX
other

Hardware Platform
NVIDIA DRIVE™ AGX Xavier DevKit (E3550)
NVIDIA DRIVE™ AGX Pegasus DevKit (E3550)
other

SDK Manager Version
1.7.0.8846
other

Host Machine Version
native Ubuntu 18.04
other

I am trying to composite 4 camera images onto 1 image, 1920x1208 resolution cameras tiled 2x2 so the resulting image is 3840x2416. Below is my kernel for mixing images.

__global__ void kernel(uint16_t* dimage, size_t dpitch, const uint32_t dwidth, const uint32_t dheight,
                       uint16_t* simage1, uint16_t* simage2, uint16_t* simage3, uint16_t* simage4, uint8_t size, size_t spitch, const uint32_t swidth, const uint32_t sheight)
{
    const uint32_t tidx = blockDim.x * blockIdx.x + threadIdx.x;
    const uint32_t tidy = blockDim.y * blockIdx.y + threadIdx.y;
    const uint32_t sIdx = (uint32_t)(tidy / sheight)*2 + (uint32_t)(tidx / swidth); // 2 image per row

    if (tidx >= dwidth || tidy >= dheight || sIdx >= size) return;

    const uint32_t colIdx = tidx % swidth;
    const uint32_t rowIdx = tidy % sheight;

    if (sIdx == 0) {
      dimage[tidy*dpitch + 4*tidx+0] = simage1[rowIdx*spitch + 4*colIdx+0];
      dimage[tidy*dpitch + 4*tidx+1] = simage1[rowIdx*spitch + 4*colIdx+1];
      dimage[tidy*dpitch + 4*tidx+2] = simage1[rowIdx*spitch + 4*colIdx+2];
      dimage[tidy*dpitch + 4*tidx+3] = simage1[rowIdx*spitch + 4*colIdx+3];
    } else if (sIdx == 1) {
      dimage[tidy*dpitch + 4*tidx+0] = simage2[rowIdx*spitch + 4*colIdx+0];
      dimage[tidy*dpitch + 4*tidx+1] = simage2[rowIdx*spitch + 4*colIdx+1];
      dimage[tidy*dpitch + 4*tidx+2] = simage2[rowIdx*spitch + 4*colIdx+2];
      dimage[tidy*dpitch + 4*tidx+3] = simage2[rowIdx*spitch + 4*colIdx+3];
    } else if (sIdx == 2) {
      dimage[tidy*dpitch + 4*tidx+0] = simage3[rowIdx*spitch + 4*colIdx+0];
      dimage[tidy*dpitch + 4*tidx+1] = simage3[rowIdx*spitch + 4*colIdx+1];
      dimage[tidy*dpitch + 4*tidx+2] = simage3[rowIdx*spitch + 4*colIdx+2];
      dimage[tidy*dpitch + 4*tidx+3] = simage3[rowIdx*spitch + 4*colIdx+3];
    } else {
      dimage[tidy*dpitch + 4*tidx+0] = simage4[rowIdx*spitch + 4*colIdx+0];
      dimage[tidy*dpitch + 4*tidx+1] = simage4[rowIdx*spitch + 4*colIdx+1];
      dimage[tidy*dpitch + 4*tidx+2] = simage4[rowIdx*spitch + 4*colIdx+2];
      dimage[tidy*dpitch + 4*tidx+3] = simage4[rowIdx*spitch + 4*colIdx+3];
    }
}

My kernel call

uint32_t iDivUp(const uint32_t a, const uint32_t b)
{
    return ((a % b) != 0U) ? ((a / b) + 1U) : (a / b);
}

void generateImage(dwImageHandle_t dimage, dwImageHandle_t *simage, const uint8_t size)
{
    dwImageProperties dprop, sprop;
    dwImage_getProperties(&dprop, dimage);
    dwImage_getProperties(&sprop, *simage);
    if (dprop.format != DW_IMAGE_FORMAT_RAW_UINT16 || sprop.format != DW_IMAGE_FORMAT_RAW_UINT16) {
        throw std::runtime_error("unsupported format in image samples generateImage");
    }

    if (dprop.type == DW_IMAGE_CUDA && sprop.type == DW_IMAGE_CUDA) {
        dwImageCUDA *dimgCUDA, *simgCUDA;
        dwImage_getCUDA(&dimgCUDA, dimage);
        uint16_t **simage_ = (uint16_t **) malloc(sizeof(uint16_t *) * size);
        for (uint8_t i = 0; i < size; ++i) {
          dwImage_getCUDA(&simgCUDA, simage[i]);
          simage_[i] = static_cast<uint16_t*>(simgCUDA->dptr[0]);
        }
        dim3 numThreads = dim3(32, 4, 1);
        kernel <<<dim3(iDivUp(dprop.width, numThreads.x),
                       iDivUp(dprop.height, numThreads.y)),
                numThreads >>>(static_cast<uint16_t*>(dimgCUDA->dptr[0]), dimgCUDA->pitch[0], dprop.width, dprop.height,
                               static_cast<uint16_t*>(simage_[0]), static_cast<uint16_t*>(simage_[1]), static_cast<uint16_t*>(simage_[2]), static_cast<uint16_t*>(simage_[3]), size, simgCUDA->pitch[0], sprop.wid$
        dwTime_t timestamp;
        dwImage_getTimestamp(&timestamp, *simage);
        dwImage_setTimestamp(timestamp, dimage);
    } else {
        throw std::runtime_error("unsupported type in image samples generateImage");
    }
}

Both the kernel and kernel call are from example image_streamer_simple.

Initializing SoftISP

  m_ispOutput = DW_SOFTISP_PROCESS_TYPE_DEMOSAIC | DW_SOFTISP_PROCESS_TYPE_TONEMAP;

  //------------------------------------------------------------------------------
  // initializes software ISP for processing RAW RCCB images
  // - the SensorCamera module
  // -----------------------------------------
  {
    dwSoftISPParams softISPParams;
//    CHECK_DW_ERROR(dwSoftISP_initParamsFromCamera(&softISPParams, &m_cameraProperties));
    softISPParams.cameraType = DW_CAMERA_GENERIC;
    softISPParams.cameraRawFormat = DW_CAMERA_RAW_FORMAT_CRBC;
    softISPParams.cameraRevision = 0;
    softISPParams.method = DW_TONEMAP_METHOD_AGTM;
    softISPParams.width  = 3840;
    softISPParams.height = 2416;
    // format => CRBC
    CHECK_DW_ERROR(dwSoftISP_initialize(&m_isp, &softISPParams, m_sdk));
    CHECK_DW_ERROR(dwSoftISP_setCUDAStream(m_stream, m_isp));
    //CHECK_DW_ERROR(dwSoftISP_setDemosaicMethod(DW_SOFTISP_DEMOSAIC_METHOD_DOWNSAMPLE, m_isp));
    //CHECK_DW_ERROR(dwSoftISP_setDenoiseMethod(DW_SOFTISP_DENOISE_METHOD_NONE, m_isp));
    CHECK_DW_ERROR(dwSoftISP_setDemosaicMethod(DW_SOFTISP_DEMOSAIC_METHOD_INTERPOLATION, m_isp));
    CHECK_DW_ERROR(dwSoftISP_setDenoiseMethod(DW_SOFTISP_DENOISE_METHOD_BILATERAL, m_isp));

    // we need to allocate memory for a demosaic image and bind it to the ISP
    dwImageProperties rcbProperties{};
    if (m_ispOutput & DW_SOFTISP_PROCESS_TYPE_DEMOSAIC) {
      // getting the properties directly from the ISP
      CHECK_DW_ERROR(dwSoftISP_getDemosaicImageProperties(&rcbProperties, m_isp));
      CHECK_DW_ERROR(dwImage_create(&m_rcbImage, rcbProperties, m_sdk));
      CHECK_DW_ERROR(dwImage_getCUDA(&m_rcbCUDAImage, m_rcbImage));
      // bind the image as the output for demosaic process to the ISP, will be filled at the call of
      // dwSoftISP_processDeviceAsync
      CHECK_DW_ERROR(dwSoftISP_bindOutputDemosaic(m_rcbCUDAImage, m_isp));
    }

    // in order to visualize we prepare the properties of the tonemapped image
    dwImageProperties rgbaImageProperties = rcbProperties;
    rgbaImageProperties.format = DW_IMAGE_FORMAT_RGBA_UINT8;
    rgbaImageProperties.type = DW_IMAGE_CUDA;

    dwImageProperties mixImageProperties = rgbaImageProperties;
    mixImageProperties.format = DW_IMAGE_FORMAT_RAW_UINT16;
    mixImageProperties.type = DW_IMAGE_CUDA;
    CHECK_DW_ERROR(dwImage_create(&m_mixImage, mixImageProperties, m_sdk));

    // allocate the rgba image
    CHECK_DW_ERROR(dwImage_create(&m_rgbaImage, rgbaImageProperties, m_sdk));
    CHECK_DW_ERROR(dwImage_getCUDA(&m_rgbaCUDAImage, m_rgbaImage));
    CHECK_DW_ERROR(dwSoftISP_bindOutputTonemap(m_rgbaCUDAImage, m_isp));
    CHECK_DW_ERROR(dwImageStreamer_initialize(&m_streamerCUDAtoCPU, &rgbaImageProperties, DW_IMAGE_CPU, m_sdk));
  }

reading frames from 4 cameras and calling kernel

static gboolean
read_frame(gpointer data)
{
  dwStatus status = DW_NOT_READY;

  for (uint8_t cameraPort = 0; cameraPort < MAX_PORTS_COUNT; ++cameraPort) {
    if (m_activeCamerasPerPort[cameraPort] == 0) continue;

    for (uint8_t cameraSiblingID = 0; cameraSiblingID < m_activeCamerasPerPort[cameraPort]; ++cameraSiblingID) {
      // return frame
      if (m_frame[cameraPort*MAX_SIBLINGS_COUNT + cameraSiblingID]) {
        status = dwSensorCamera_returnFrame(&m_frame[cameraPort*MAX_SIBLINGS_COUNT + cameraSiblingID]);
        if (status != DW_SUCCESS) 
          return false;
      }

      // read from camera
      status = dwSensorCamera_readFrame(&m_frame[cameraPort*MAX_SIBLINGS_COUNT + cameraSiblingID], cameraSiblingID, timeout, m_camera[cameraPort]);
      if (status != DW_SUCCESS)
        return false;

      // get an image with the desired output format (async operation)
      dwSensorCamera_getImageAsync(&m_frameImage[cameraPort*MAX_SIBLINGS_COUNT + cameraSiblingID], DW_CAMERA_OUTPUT_CUDA_RAW_UINT16, m_frame[cameraPort*MAX_SIBLINGS_COUNT + cameraSiblingID]);
      if (status != DW_SUCCESS) 
        return false;
    }
  }

  CHECK_CUDA_ERROR(cudaStreamSynchronize(m_stream));

  if (status == DW_SUCCESS) {
    dwImageCUDA *imgCUDA;
    dwImage_getCUDA(&imgCUDA, m_mixImage);
    printf("generating image %x\n", static_cast<uint16_t*>(imgCUDA->dptr[0]));
    generateImage(m_mixImage, (dwImageHandle_t *)&m_frameImage[0], m_activeCameras);
    printf("generated image %x\n", static_cast<uint16_t*>(imgCUDA->dptr[0]));
  }


  return status == DW_SUCCESS;
}

sending mixed image to SoftISP to get the tonemap and stream it to the cpu

  if (!read_frame(NULL)) {
    printf("read_frame returned false\n");

    buffer = gst_buffer_new_allocate (NULL, sizeof(guint8) * size, NULL);

    /* increment the timestamp every 1/30 second */
    GST_BUFFER_PTS (buffer) = ctx->timestamp;
    GST_BUFFER_DURATION (buffer) = gst_util_uint64_scale_int (1, GST_SECOND, 30);
    ctx->timestamp += GST_BUFFER_DURATION (buffer);

    g_signal_emit_by_name (appsrc, "push-buffer", buffer, &ret);
    gst_buffer_unref (buffer);

    return;
  }

  CHECK_CUDA_ERROR(cudaStreamSynchronize(m_stream));

  dwImageProperties prop;
  dwImage_getProperties(&prop, m_mixImage);
  dwTime_t timestamp;
  dwImage_getTimestamp(&timestamp, m_mixImage);
  printf("timestamp %u format %d type %d width %d height %d memoryLayout %d\n", timestamp, prop.format, prop.type, prop.width, prop.height, prop.memoryLayout);

  // raw images need to be processed through the softISP
  dwImageCUDA* rawImageCUDA;
  CHECK_DW_ERROR(dwImage_getCUDA(&rawImageCUDA, m_mixImage));
  CHECK_DW_ERROR(dwSoftISP_bindInputRaw(rawImageCUDA, m_isp));
  // request the softISP to perform a demosaic and a tonemap. This is for edmonstration purposes, the demosaic
  // output will not be used in this sample, only the tonemap output
  CHECK_DW_ERROR(dwSoftISP_setProcessType(m_ispOutput, m_isp));
  CHECK_CUDA_ERROR(dwSoftISP_processDeviceAsync(m_isp));

  // stream that tonemap image to the CPU domain
  CHECK_DW_ERROR(dwImageStreamer_producerSend(m_rgbaImage, m_streamerCUDAtoCPU));

  // receive the streamed image as a handle
  dwImageHandle_t frameCPU;
  CHECK_DW_ERROR(dwImageStreamer_consumerReceive(&frameCPU, timeout, m_streamerCUDAtoCPU));

  // get an image from the frame
  dwImageCPU *imgCPU;
  CHECK_DW_ERROR(dwImage_getCPU(&imgCPU, frameCPU));

  guint8 *data_cp = (guint8 *) malloc(sizeof(guint8) * size);

  memcpy(data_cp, imgCPU->data[0], sizeof(guint8) * size);

  buffer = gst_buffer_new_wrapped (data_cp, sizeof(guint8) * size);

  /* increment the timestamp every 1/30 second */
  GST_BUFFER_PTS (buffer) = ctx->timestamp;
  GST_BUFFER_DURATION (buffer) = gst_util_uint64_scale_int (1, GST_SECOND, 30);
  ctx->timestamp += GST_BUFFER_DURATION (buffer);

  g_signal_emit_by_name (appsrc, "push-buffer", buffer, &ret);
  gst_buffer_unref (buffer);

  // returned the consumed image
  CHECK_DW_ERROR(dwImageStreamer_consumerReturn(&frameCPU, m_streamerCUDAtoCPU));

  // notify the producer that the work is done
  CHECK_DW_ERROR(dwImageStreamer_producerReturn(nullptr, timeout, m_streamerCUDAtoCPU));

This is the error

parameters output-format=raw+data,camera-type=ar0231-rccb-bae-sf3324,camera-group=a,camera-count=4,format=raw,fifo-size=3,slave=0
nvmedia isc: GetCameraPowerControlLevel: 936: Camera power control library: NVCCP
Max96712 Rev 2 detected!
MAX96712: Enable periodic AEQ on Link 0
MAX96712: Enable periodic AEQ on Link 1
MAX96712: Enable periodic AEQ on Link 2
MAX96712: Enable periodic AEQ on Link 3
MAX96705: Pre-emphasis set to 0xaa
Sensor AR0231 RCCB Rev7 detected!
Sensor AR0231 RCCB Rev7 detected!
Sensor AR0231 RCCB Rev7 detected!
Sensor AR0231 RCCB Rev7 detected!
stream ready at rtsp://0.0.0.0:8554/video
w 3840 h 2416
generating image d6150000
generated image d6150000
timestamp 3493126299 format 4000 type 2 width 3840 height 2416 memoryLayout 1
terminate called after throwing an instance of 'std::runtime_error'
  what():  [2021-11-11 08:06:02] DW Error DW_INTERNAL_ERROR executing DW function:
 dwImageStreamer_producerSend(m_rgbaImage, m_streamerCUDAtoCPU)
 at /home/nvidia/my_ws/src/camera_gmsl/main.cpp:236
Aborted (core dumped)

Thanks in advance for your help!

Dear @mkahra14,
Do you see no issue when you comment big image generation code and testing the image streamer workflow?

Sorry @SivaRamaKrishnaNV , I didn’t quite understand your question. Earlier I was mixing camera images on the CPU but it was slow, so I came up with this code.

I have solved the problem, as the image format is RAW_UINT16 i thought that the type for each element was uint16 but it turns out it should be uint8_t.

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.