Hello,
I am working on NVIDIA Jetson Orin NX 16GB, Jetpack 5.1.1, L4T 35.3.1, VPI 2.2.7.
I have an image with width 4112, height 2176, trying to calculate a histogram using vpi on cuda, vpiSubmitHistogram return 1. If I change VPI_BACKEND_CUDA to VPI_BACKEND_CPU or give the input image 1920x1080, everything is fine. But I need to get a histogram from 4112x2176 with VPI_BACKEND_CUDA.
My sample :
#include <opencv2/core.hpp>
#include <iostream>
#include <vpi/OpenCVInterop.hpp>
#include <vpi/Types.h>
#include <vpi/ArrayType.h>
#include <vpi/Array.h>
#include <vpi/Stream.h>
#include <vpi/algo/Histogram.h>
#include <numeric>
using namespace cv;
using namespace std;
using namespace nv;
int main()
{
int numBins = 256;
int start = 0;
int end = 256;
int width = 4112;
int height = 2176;
vector<uint8_t> pixels( width * height, 1 );
pixels[0] = 0;
pixels[5] = 0;
pixels[10] = 0;
pixels[11] = 255;
pixels[12] = 3;
pixels[13] = 5;
Mat opencv_image( height, width, CV_8UC1, pixels.data() );
cout << "image channels " << opencv_image.channels() << endl;
cout << "size " << opencv_image.rows << " " << opencv_image.cols << endl;
VPIStream stream;
cout << "\n create stream " << vpiStreamCreate( 0, &stream );
VPIImage image;
cout << "\n import opencv image "
<< vpiImageCreateWrapperOpenCVMat( opencv_image, VPI_IMAGE_FORMAT_U8, 0, &image );
VPIArray output;
cout << "\n create output histogram " << vpiArrayCreate( numBins, VPI_ARRAY_TYPE_U32, 0, &output );
VPIPayload payload;
cout << "\n create payload "
<< vpiCreateHistogramEven( VPI_BACKEND_CUDA, VPI_IMAGE_FORMAT_U8, start, end, 256, &payload );
if( auto error = vpiSubmitHistogram( stream, VPI_BACKEND_CUDA, payload, image, output, 0 ) )
{
cout << "\n [ERROR] error in submit histogram " << error << endl;
}
if( auto error = vpiStreamSync( stream ) )
{
cout << "\n [ERROR] error in stream sync " << error << endl;
}
VPIArrayData output_data;
cout << "\n lock array " << vpiArrayLock( output, VPI_LOCK_READ_WRITE );
cout << "\n array lock data "
<< vpiArrayLockData( output, VPI_LOCK_READ_WRITE, VPI_ARRAY_BUFFER_HOST_AOS, &output_data );
cout << "\n array size " << *output_data.buffer.aos.sizePointer << "\n";
int sum = 0;
for( int i = 0; i < 256; ++i )
{
sum += ( ( static_cast<uint32_t*>( output_data.buffer.aos.data ) )[i] );
cout << ( ( static_cast<uint32_t*>( output_data.buffer.aos.data ) )[i] ) << " ";
}
cout << "\n sum " << sum << endl;
cout << "\n unlock " << vpiArrayUnlock( output ) << endl;
vpiStreamDestroy(stream);
vpiImageDestroy(image);
vpiArrayDestroy(output);
vpiPayloadDestroy(payload);
return 0;
}