Please provide the following info (check/uncheck the boxes after creating this topic):
Software Version
DRIVE OS Linux 5.2.6
DRIVE OS Linux 5.2.6 and DriveWorks 4.0
DRIVE OS Linux 5.2.0
DRIVE OS Linux 5.2.0 and DriveWorks 3.5
NVIDIA DRIVE™ Software 10.0 (Linux)
NVIDIA DRIVE™ Software 9.0 (Linux)
other DRIVE OS version
other
Target Operating System
Linux
QNX
other
Hardware Platform
NVIDIA DRIVE™ AGX Xavier DevKit (E3550)
NVIDIA DRIVE™ AGX Pegasus DevKit (E3550)
other
SDK Manager Version
1.7.0.8846
other
Host Machine Version
native Ubuntu 18.04
other
I am trying to composite 4 camera images onto 1 image, 1920x1208 resolution cameras tiled 2x2 so the resulting image is 3840x2416. Below is my kernel for mixing images.
__global__ void kernel(uint16_t* dimage, size_t dpitch, const uint32_t dwidth, const uint32_t dheight,
uint16_t* simage1, uint16_t* simage2, uint16_t* simage3, uint16_t* simage4, uint8_t size, size_t spitch, const uint32_t swidth, const uint32_t sheight)
{
const uint32_t tidx = blockDim.x * blockIdx.x + threadIdx.x;
const uint32_t tidy = blockDim.y * blockIdx.y + threadIdx.y;
const uint32_t sIdx = (uint32_t)(tidy / sheight)*2 + (uint32_t)(tidx / swidth); // 2 image per row
if (tidx >= dwidth || tidy >= dheight || sIdx >= size) return;
const uint32_t colIdx = tidx % swidth;
const uint32_t rowIdx = tidy % sheight;
if (sIdx == 0) {
dimage[tidy*dpitch + 4*tidx+0] = simage1[rowIdx*spitch + 4*colIdx+0];
dimage[tidy*dpitch + 4*tidx+1] = simage1[rowIdx*spitch + 4*colIdx+1];
dimage[tidy*dpitch + 4*tidx+2] = simage1[rowIdx*spitch + 4*colIdx+2];
dimage[tidy*dpitch + 4*tidx+3] = simage1[rowIdx*spitch + 4*colIdx+3];
} else if (sIdx == 1) {
dimage[tidy*dpitch + 4*tidx+0] = simage2[rowIdx*spitch + 4*colIdx+0];
dimage[tidy*dpitch + 4*tidx+1] = simage2[rowIdx*spitch + 4*colIdx+1];
dimage[tidy*dpitch + 4*tidx+2] = simage2[rowIdx*spitch + 4*colIdx+2];
dimage[tidy*dpitch + 4*tidx+3] = simage2[rowIdx*spitch + 4*colIdx+3];
} else if (sIdx == 2) {
dimage[tidy*dpitch + 4*tidx+0] = simage3[rowIdx*spitch + 4*colIdx+0];
dimage[tidy*dpitch + 4*tidx+1] = simage3[rowIdx*spitch + 4*colIdx+1];
dimage[tidy*dpitch + 4*tidx+2] = simage3[rowIdx*spitch + 4*colIdx+2];
dimage[tidy*dpitch + 4*tidx+3] = simage3[rowIdx*spitch + 4*colIdx+3];
} else {
dimage[tidy*dpitch + 4*tidx+0] = simage4[rowIdx*spitch + 4*colIdx+0];
dimage[tidy*dpitch + 4*tidx+1] = simage4[rowIdx*spitch + 4*colIdx+1];
dimage[tidy*dpitch + 4*tidx+2] = simage4[rowIdx*spitch + 4*colIdx+2];
dimage[tidy*dpitch + 4*tidx+3] = simage4[rowIdx*spitch + 4*colIdx+3];
}
}
My kernel call
uint32_t iDivUp(const uint32_t a, const uint32_t b)
{
return ((a % b) != 0U) ? ((a / b) + 1U) : (a / b);
}
void generateImage(dwImageHandle_t dimage, dwImageHandle_t *simage, const uint8_t size)
{
dwImageProperties dprop, sprop;
dwImage_getProperties(&dprop, dimage);
dwImage_getProperties(&sprop, *simage);
if (dprop.format != DW_IMAGE_FORMAT_RAW_UINT16 || sprop.format != DW_IMAGE_FORMAT_RAW_UINT16) {
throw std::runtime_error("unsupported format in image samples generateImage");
}
if (dprop.type == DW_IMAGE_CUDA && sprop.type == DW_IMAGE_CUDA) {
dwImageCUDA *dimgCUDA, *simgCUDA;
dwImage_getCUDA(&dimgCUDA, dimage);
uint16_t **simage_ = (uint16_t **) malloc(sizeof(uint16_t *) * size);
for (uint8_t i = 0; i < size; ++i) {
dwImage_getCUDA(&simgCUDA, simage[i]);
simage_[i] = static_cast<uint16_t*>(simgCUDA->dptr[0]);
}
dim3 numThreads = dim3(32, 4, 1);
kernel <<<dim3(iDivUp(dprop.width, numThreads.x),
iDivUp(dprop.height, numThreads.y)),
numThreads >>>(static_cast<uint16_t*>(dimgCUDA->dptr[0]), dimgCUDA->pitch[0], dprop.width, dprop.height,
static_cast<uint16_t*>(simage_[0]), static_cast<uint16_t*>(simage_[1]), static_cast<uint16_t*>(simage_[2]), static_cast<uint16_t*>(simage_[3]), size, simgCUDA->pitch[0], sprop.wid$
dwTime_t timestamp;
dwImage_getTimestamp(×tamp, *simage);
dwImage_setTimestamp(timestamp, dimage);
} else {
throw std::runtime_error("unsupported type in image samples generateImage");
}
}
Both the kernel and kernel call are from example image_streamer_simple.
Initializing SoftISP
m_ispOutput = DW_SOFTISP_PROCESS_TYPE_DEMOSAIC | DW_SOFTISP_PROCESS_TYPE_TONEMAP;
//------------------------------------------------------------------------------
// initializes software ISP for processing RAW RCCB images
// - the SensorCamera module
// -----------------------------------------
{
dwSoftISPParams softISPParams;
// CHECK_DW_ERROR(dwSoftISP_initParamsFromCamera(&softISPParams, &m_cameraProperties));
softISPParams.cameraType = DW_CAMERA_GENERIC;
softISPParams.cameraRawFormat = DW_CAMERA_RAW_FORMAT_CRBC;
softISPParams.cameraRevision = 0;
softISPParams.method = DW_TONEMAP_METHOD_AGTM;
softISPParams.width = 3840;
softISPParams.height = 2416;
// format => CRBC
CHECK_DW_ERROR(dwSoftISP_initialize(&m_isp, &softISPParams, m_sdk));
CHECK_DW_ERROR(dwSoftISP_setCUDAStream(m_stream, m_isp));
//CHECK_DW_ERROR(dwSoftISP_setDemosaicMethod(DW_SOFTISP_DEMOSAIC_METHOD_DOWNSAMPLE, m_isp));
//CHECK_DW_ERROR(dwSoftISP_setDenoiseMethod(DW_SOFTISP_DENOISE_METHOD_NONE, m_isp));
CHECK_DW_ERROR(dwSoftISP_setDemosaicMethod(DW_SOFTISP_DEMOSAIC_METHOD_INTERPOLATION, m_isp));
CHECK_DW_ERROR(dwSoftISP_setDenoiseMethod(DW_SOFTISP_DENOISE_METHOD_BILATERAL, m_isp));
// we need to allocate memory for a demosaic image and bind it to the ISP
dwImageProperties rcbProperties{};
if (m_ispOutput & DW_SOFTISP_PROCESS_TYPE_DEMOSAIC) {
// getting the properties directly from the ISP
CHECK_DW_ERROR(dwSoftISP_getDemosaicImageProperties(&rcbProperties, m_isp));
CHECK_DW_ERROR(dwImage_create(&m_rcbImage, rcbProperties, m_sdk));
CHECK_DW_ERROR(dwImage_getCUDA(&m_rcbCUDAImage, m_rcbImage));
// bind the image as the output for demosaic process to the ISP, will be filled at the call of
// dwSoftISP_processDeviceAsync
CHECK_DW_ERROR(dwSoftISP_bindOutputDemosaic(m_rcbCUDAImage, m_isp));
}
// in order to visualize we prepare the properties of the tonemapped image
dwImageProperties rgbaImageProperties = rcbProperties;
rgbaImageProperties.format = DW_IMAGE_FORMAT_RGBA_UINT8;
rgbaImageProperties.type = DW_IMAGE_CUDA;
dwImageProperties mixImageProperties = rgbaImageProperties;
mixImageProperties.format = DW_IMAGE_FORMAT_RAW_UINT16;
mixImageProperties.type = DW_IMAGE_CUDA;
CHECK_DW_ERROR(dwImage_create(&m_mixImage, mixImageProperties, m_sdk));
// allocate the rgba image
CHECK_DW_ERROR(dwImage_create(&m_rgbaImage, rgbaImageProperties, m_sdk));
CHECK_DW_ERROR(dwImage_getCUDA(&m_rgbaCUDAImage, m_rgbaImage));
CHECK_DW_ERROR(dwSoftISP_bindOutputTonemap(m_rgbaCUDAImage, m_isp));
CHECK_DW_ERROR(dwImageStreamer_initialize(&m_streamerCUDAtoCPU, &rgbaImageProperties, DW_IMAGE_CPU, m_sdk));
}
reading frames from 4 cameras and calling kernel
static gboolean
read_frame(gpointer data)
{
dwStatus status = DW_NOT_READY;
for (uint8_t cameraPort = 0; cameraPort < MAX_PORTS_COUNT; ++cameraPort) {
if (m_activeCamerasPerPort[cameraPort] == 0) continue;
for (uint8_t cameraSiblingID = 0; cameraSiblingID < m_activeCamerasPerPort[cameraPort]; ++cameraSiblingID) {
// return frame
if (m_frame[cameraPort*MAX_SIBLINGS_COUNT + cameraSiblingID]) {
status = dwSensorCamera_returnFrame(&m_frame[cameraPort*MAX_SIBLINGS_COUNT + cameraSiblingID]);
if (status != DW_SUCCESS)
return false;
}
// read from camera
status = dwSensorCamera_readFrame(&m_frame[cameraPort*MAX_SIBLINGS_COUNT + cameraSiblingID], cameraSiblingID, timeout, m_camera[cameraPort]);
if (status != DW_SUCCESS)
return false;
// get an image with the desired output format (async operation)
dwSensorCamera_getImageAsync(&m_frameImage[cameraPort*MAX_SIBLINGS_COUNT + cameraSiblingID], DW_CAMERA_OUTPUT_CUDA_RAW_UINT16, m_frame[cameraPort*MAX_SIBLINGS_COUNT + cameraSiblingID]);
if (status != DW_SUCCESS)
return false;
}
}
CHECK_CUDA_ERROR(cudaStreamSynchronize(m_stream));
if (status == DW_SUCCESS) {
dwImageCUDA *imgCUDA;
dwImage_getCUDA(&imgCUDA, m_mixImage);
printf("generating image %x\n", static_cast<uint16_t*>(imgCUDA->dptr[0]));
generateImage(m_mixImage, (dwImageHandle_t *)&m_frameImage[0], m_activeCameras);
printf("generated image %x\n", static_cast<uint16_t*>(imgCUDA->dptr[0]));
}
return status == DW_SUCCESS;
}
sending mixed image to SoftISP to get the tonemap and stream it to the cpu
if (!read_frame(NULL)) {
printf("read_frame returned false\n");
buffer = gst_buffer_new_allocate (NULL, sizeof(guint8) * size, NULL);
/* increment the timestamp every 1/30 second */
GST_BUFFER_PTS (buffer) = ctx->timestamp;
GST_BUFFER_DURATION (buffer) = gst_util_uint64_scale_int (1, GST_SECOND, 30);
ctx->timestamp += GST_BUFFER_DURATION (buffer);
g_signal_emit_by_name (appsrc, "push-buffer", buffer, &ret);
gst_buffer_unref (buffer);
return;
}
CHECK_CUDA_ERROR(cudaStreamSynchronize(m_stream));
dwImageProperties prop;
dwImage_getProperties(&prop, m_mixImage);
dwTime_t timestamp;
dwImage_getTimestamp(×tamp, m_mixImage);
printf("timestamp %u format %d type %d width %d height %d memoryLayout %d\n", timestamp, prop.format, prop.type, prop.width, prop.height, prop.memoryLayout);
// raw images need to be processed through the softISP
dwImageCUDA* rawImageCUDA;
CHECK_DW_ERROR(dwImage_getCUDA(&rawImageCUDA, m_mixImage));
CHECK_DW_ERROR(dwSoftISP_bindInputRaw(rawImageCUDA, m_isp));
// request the softISP to perform a demosaic and a tonemap. This is for edmonstration purposes, the demosaic
// output will not be used in this sample, only the tonemap output
CHECK_DW_ERROR(dwSoftISP_setProcessType(m_ispOutput, m_isp));
CHECK_CUDA_ERROR(dwSoftISP_processDeviceAsync(m_isp));
// stream that tonemap image to the CPU domain
CHECK_DW_ERROR(dwImageStreamer_producerSend(m_rgbaImage, m_streamerCUDAtoCPU));
// receive the streamed image as a handle
dwImageHandle_t frameCPU;
CHECK_DW_ERROR(dwImageStreamer_consumerReceive(&frameCPU, timeout, m_streamerCUDAtoCPU));
// get an image from the frame
dwImageCPU *imgCPU;
CHECK_DW_ERROR(dwImage_getCPU(&imgCPU, frameCPU));
guint8 *data_cp = (guint8 *) malloc(sizeof(guint8) * size);
memcpy(data_cp, imgCPU->data[0], sizeof(guint8) * size);
buffer = gst_buffer_new_wrapped (data_cp, sizeof(guint8) * size);
/* increment the timestamp every 1/30 second */
GST_BUFFER_PTS (buffer) = ctx->timestamp;
GST_BUFFER_DURATION (buffer) = gst_util_uint64_scale_int (1, GST_SECOND, 30);
ctx->timestamp += GST_BUFFER_DURATION (buffer);
g_signal_emit_by_name (appsrc, "push-buffer", buffer, &ret);
gst_buffer_unref (buffer);
// returned the consumed image
CHECK_DW_ERROR(dwImageStreamer_consumerReturn(&frameCPU, m_streamerCUDAtoCPU));
// notify the producer that the work is done
CHECK_DW_ERROR(dwImageStreamer_producerReturn(nullptr, timeout, m_streamerCUDAtoCPU));
This is the error
parameters output-format=raw+data,camera-type=ar0231-rccb-bae-sf3324,camera-group=a,camera-count=4,format=raw,fifo-size=3,slave=0
nvmedia isc: GetCameraPowerControlLevel: 936: Camera power control library: NVCCP
Max96712 Rev 2 detected!
MAX96712: Enable periodic AEQ on Link 0
MAX96712: Enable periodic AEQ on Link 1
MAX96712: Enable periodic AEQ on Link 2
MAX96712: Enable periodic AEQ on Link 3
MAX96705: Pre-emphasis set to 0xaa
Sensor AR0231 RCCB Rev7 detected!
Sensor AR0231 RCCB Rev7 detected!
Sensor AR0231 RCCB Rev7 detected!
Sensor AR0231 RCCB Rev7 detected!
stream ready at rtsp://0.0.0.0:8554/video
w 3840 h 2416
generating image d6150000
generated image d6150000
timestamp 3493126299 format 4000 type 2 width 3840 height 2416 memoryLayout 1
terminate called after throwing an instance of 'std::runtime_error'
what(): [2021-11-11 08:06:02] DW Error DW_INTERNAL_ERROR executing DW function:
dwImageStreamer_producerSend(m_rgbaImage, m_streamerCUDAtoCPU)
at /home/nvidia/my_ws/src/camera_gmsl/main.cpp:236
Aborted (core dumped)
Thanks in advance for your help!