Hello, Developers!
I am the beginner of cuda programming and i have to learn very fast as i can.
following code is my original code of opencv 4.0.0 with viusal studio 2017 in release mode.
(debug mode cannot open images.)
#include <iostream>
#include <opencv.hpp>
#include <opencv2/opencv.hpp>
#include <Windows.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <opencv2/imgcodecs/imgcodecs.hpp>
using namespace std;
using namespace cv;
__global__ void imageSubtract(Mat *img1, Mat *img2, Mat *diff) {
subtract(*img1, *img2, *diff);
}
int main(int argc, const char** argv) {
LARGE_INTEGER tFreq, tStart, tEnd;
cudaEvent_t start, stop;
float tms, ms;
Mat img1, img2;
Mat diff, diff2;
img1 = imread("sky1.jpg", CV_LOAD_IMAGE_UNCHANGED);
img2 = imread("sky2.jpg", CV_LOAD_IMAGE_UNCHANGED);
resize(img1, img1, Size(640, 480));
resize(img2, img2, Size(640, 480));
Mat *dev_img1, *dev_img2, *dev_diff;
cudaMalloc((void**)dev_img1, sizeof(dev_img1));
cudaMalloc((void**)dev_img1, sizeof(dev_img1));
cudaMemcpy(&img1, dev_img1, sizeof(Mat), cudaMemcpyHostToDevice);
cudaMemcpy(&img2, dev_img2, sizeof(Mat), cudaMemcpyHostToDevice);
//////////////////////////////////
QueryPerformanceFrequency(&tFreq); // Frequency set
QueryPerformanceCounter(&tStart); // Time count Start
//////////////////////////////////
subtract(img1, img2, diff); // sequences.
//////////////////////////////////
QueryPerformanceCounter(&tEnd); // TIme count End
tms = ((tEnd.QuadPart - tStart.QuadPart) / (float)tFreq.QuadPart) * 1000;
cout << fixed;
cout.precision(50);
cout << "CPU Time=" << tms << endl;
imshow("CPU_image1", img1);
moveWindow("CPU_image1", 250, 250);
imshow("CPU_image2", img2);
moveWindow("CPU_image2", 250, 250);
imshow("CPU_result", diff);
moveWindow("CPU_result", 250, 250);
//////////////////////////////////
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
//////////////////////////////////
imageSubtract <<<1, 1 >> > (dev_img1, dev_img2, dev_diff);
//////////////////////////////////
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&ms, start, stop);
cudaMemcpy(&diff2, dev_diff, sizeof(Mat), cudaMemcpyDeviceToHost);
imshow("GPU_image1", img1);
moveWindow("CPU_image1", 250, 250);
imshow("GPU_image2", img2);
moveWindow("CPU_image2", 250, 250);
imshow("GPU_result", diff2);
moveWindow("GPU_result", 250, 250);
cout << fixed;
cout.precision(50);
cout << "GPU Time=" << ms << endl;
waitKey(0);
destroyAllWindows();
cudaFree(dev_img1);
cudaFree(dev_img2);
cudaFree(dev_diff);
return 0;
}
Following messages are debugging from viusal studio 2017.
Error calling a __host__ function("cv::subtract") from a __global__ function("imageSubtract") is not allowed cudatest C:\Users\SeungJinLee\source\repos\cudatest\cudatest\image_subtraction.cu 14
Error identifier "cv::subtract" is undefined in device code cudatest C:\Users\SeungJinLee\source\repos\cudatest\cudatest\image_subtraction.cu 14
Error calling a __host__ function("cv::debug_build_guard::_InputArray::_InputArray") from a __global__ function("imageSubtract") is not allowed cudatest
C:\Users\SeungJinLee\source\repos\cudatest\cudatest\image_subtraction.cu 14
Error identifier "cv::debug_build_guard::_InputArray::_InputArray" is undefined in device code cudatest C:\Users\SeungJinLee\source\repos\cudatest\cudatest\image_subtraction.cu 14
Error calling a __host__ function("cv::debug_build_guard::_InputArray::_InputArray") from a __global__ function("imageSubtract") is not allowed cudatest C:\Users\SeungJinLee\source\repos\cudatest\cudatest\image_subtraction.cu 14
Error identifier "cv::debug_build_guard::_InputArray::_InputArray" is undefined in device code cudatest C:\Users\SeungJinLee\source\repos\cudatest\cudatest\image_subtraction.cu 14
Error calling a __host__ function("cv::debug_build_guard::_OutputArray::_OutputArray") from a __global__ function("imageSubtract") is not allowed cudatest C:\Users\SeungJinLee\source\repos\cudatest\cudatest\image_subtraction.cu 14
Error identifier "cv::debug_build_guard::_OutputArray::_OutputArray" is undefined in device code cudatest C:\Users\SeungJinLee\source\repos\cudatest\cudatest\image_subtraction.cu 14
Error calling a __host__ function("cv::noArray") from a __global__ function("imageSubtract") is not allowed cudatest C:\Users\SeungJinLee\source\repos\cudatest\cudatest\image_subtraction.cu 14
Error identifier "cv::noArray" is undefined in device code cudatest C:\Users\SeungJinLee\source\repos\cudatest\cudatest\image_subtraction.cu 14
Error calling a __host__ function("cv::debug_build_guard::_InputArray::~_InputArray") from a __global__ function("imageSubtract") is not allowed cudatest C:\Users\SeungJinLee\source\repos\cudatest\cudatest\image_subtraction.cu 14
Error identifier "cv::debug_build_guard::_InputArray::~_InputArray" is undefined in device code cudatest C:\Users\SeungJinLee\source\repos\cudatest\cudatest\image_subtraction.cu 14
Error calling a __host__ function("cv::debug_build_guard::_InputArray::~_InputArray") from a __global__ function("imageSubtract") is not allowed cudatest C:\Users\SeungJinLee\source\repos\cudatest\cudatest\image_subtraction.cu 14
Error identifier "cv::debug_build_guard::_InputArray::~_InputArray" is undefined in device code cudatest C:\Users\SeungJinLee\source\repos\cudatest\cudatest\image_subtraction.cu 14
Warning calling a __host__ function("cv::debug_build_guard::_InputArray::~_InputArray") from a __host__ __device__ function("cv::debug_build_guard::_InputArray::~_InputArray [subobject]") is not allowed cudatest c:\program files\nvidia gpu computing toolkit\cuda\v10.0\include\vector_types.h 420
Error MSB3721 ""C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0\bin\nvcc.exe" -gencode=arch=compute_35,code=\"sm_35,compute_35\" --use-local-env -ccbin "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\bin\HostX86\x64" -x cu -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0\include" -G --keep-dir x64\Debug -maxrregcount=0 --machine 64 --compile -cudart static -g -DWIN32 -DWIN64 -D_DEBUG -D_CONSOLE -D_MBCS -Xcompiler "/EHsc /W3 /nologo /Od /Fdx64\Debug\vc141.pdb /FS /Zi /RTC1 /MDd " -o x64\Debug\image_subtraction.cu.obj "C:\Users\SeungJinLee\source\repos\cudatest\cudatest\image_subtraction.cu"" Order is finished.(code: 1). cudatest C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\IDE\VC\VCTargets\BuildCustomizations\CUDA 10.0.targets 712
I want to show the GPU kernel function is much faster than basic CPU process.
I am not sure about what is wrong with this…
Please tell me a tip or error about my code?
ps. if you have a free time, please teach me how to use the cuda 10 in viusal studio 2017 with opencv 4.0.0. or please recommand the version of the software.