I did some searching both before and after posting the original question to try to find examples. It seems like other developers may have seen the opportunity to take advantage of the Jetson product line ability to avoid the upload/download copying. However, I did not find any examples in the Nvidia forums but found one example here: Using OpenCV with CUDA on the Jetson TX2 - APIs - ximea support
Here is a simple example with upload/download working properly but copying from host to device and back again. The Jetson Nano (TX2, etc.) should be able to eliminate the upload/download steps:
//
// SimpleTest.cpp
// Loads an image, calls a GPU enabled function that uses opencv GPUMat upload/download
//
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <iostream>
#include <stdio.h>
// cuda stuff
#include <opencv2/cudaarithm.hpp>
// Nvidia cuda api
#include <cuda_runtime.h>
using namespace std;
cv::Mat testfunction(cv::Mat& h_original) {
// receives a CPU/host based image, converts it to GPU/device based image
// manipulates it, then converts back to CPU/host based result.
cv::Mat h_result (h_original.size(), h_original.type());
// create GPU/device images, same size and type as original host image
cv::cuda::GpuMat d_original(h_original.size(), h_original.type());
cv::cuda::GpuMat d_result(h_original.size(), h_original.type());
// upload the original image from host to device
d_original.upload(h_original);
// perform a GPU operation of some sort. Using threshold for simple placeholder
cv::cuda::threshold(d_original, d_result, 128.0, 255.0, cv::THRESH_BINARY);
// download the result image from device to host
d_result.download(h_result);
return h_result;
}
int main(int argc, char *argv[]) {
cv::namedWindow("original image", cv::WINDOW_AUTOSIZE);
cv::namedWindow("modified image", cv::WINDOW_AUTOSIZE );
cv::String filename = "./lena.jpg";
cv::Mat image, newimage;
image = cv::imread(filename);
if (image.empty()) {
cout << "could not open or find the image" << endl;
return -1;]
}
newimage = testfunction(image);
cv::imshow("original image", image);
cv::imshow("modified image", newimage);
cv::waitKey(0);
cv::destroyAllWindows();
return 0;
}
Below is an attempt to implement a similar example to the Ximea link. However, this code produces an error:
terminate called after throwing an instance of 'cv::Exception'
what(): OpenCV(3.4.6) [long path deleted...]/opencv-3.4.6/modules/cudev/include/opencv2/cudev/grid/detail/transform.hpp:315: error: (-217:Gpu API call) unspecified launch failure in function 'call'
Example (not working) code:
//
// SimpleTestGPU.cpp
// Loads an image, calls a GPU enabled function that uses opencv GPUMat
// but eliminates copying from CPU host to GPU host using upload/download
//
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <iostream>
#include <stdio.h>
// cuda stuff
#include <opencv2/cudaarithm.hpp>
// Nvidia cuda api
#include <cuda_runtime.h>
using namespace std;
cv::Mat testfunction(cv::Mat& h_original) {
// receives a CPU/host based image, but avoids copying with upload/download
// for the GPU manipulation. The result is returned in the CPU/host based h_result.
cv::Mat h_result (h_original.size(), h_original.type());
// Define pointers used to create GpuMat's
void *originalptr;
void *resultptr;
// Convert CPU/host original images to pointers
cudaHostGetDevicePointer(&originalptr, h_original.data, 0);
cudaHostGetDevicePointer(&resultptr, h_result.data, 0);
// Create GpuMats from the device pointers
cv::cuda::GpuMat d_original(h_original.size(), h_original.type(), originalptr);
cv::cuda::GpuMat d_result(h_result.size(), h_result.type(), resultptr);
// perform a GPU operation of some sort. Using threshold for simple placeholder
cv::cuda::threshold(d_original, d_result, 128.0, 255.0, cv::THRESH_BINARY);
// no need to download or copy the result image from device to host - it already
// resides in h_result.
return h_result;
}
int main(int argc, char *argv[]) {
cv::namedWindow("original image", cv::WINDOW_AUTOSIZE);
cv::namedWindow("modified image", cv::WINDOW_AUTOSIZE );
cv::String filename = "./lena.jpg";
cv::Mat image, newimage;
image = cv::imread(filename);
if (image.empty()) {
cout << "could not open or find the image" << endl;
return -1;
}
newimage = testfunction(image);
cv::imshow("original image", image);
cv::imshow("modified image", newimage);
cv::waitKey(0);
cv::destroyAllWindows();
return 0;
}
The method used to share the image.data does not work, but not sure why or what it should be. Any insights or suggestions would be greatly appreciated.