I had some trouble when I tried to rewrite this program as CUDA.
#include
#include “opencv2/opencv.hpp”
#include
using namespace std;
using namespace cv;int main(int argc, char** argv)
{
Mat src;
src = imread(“…/3.png”, 0);namedWindow("InputImages", WINDOW_NORMAL); imshow("InputImages", src); int src_height = src.rows; int src_width = src.cols; Size dstSize = Size(2 * src_height, 2 * src_height); Mat dst = Mat::zeros(dstSize, CV_8UC1); double scale_r = 2 * src_height / (dstSize.width); double scale_theta = src_width / CV_2PI; for (int i = 0; i < dstSize.height; ++i) { for (int j = 0; j < dstSize.width; ++j) { Point2d center(dstSize.width / 2, dstSize.width / 2); double distance = sqrt(pow(i - center.y, 2) + pow(j - center.x, 2)); if (distance < dstSize.width / 2) { double Rec_Y = distance; if (Rec_Y < 0) { Rec_Y = 0; } if (Rec_Y > dstSize.width / 2) { Rec_Y = dstSize.width / 2; } double line_theta = atan2(i - center.y, j - center.x); if (line_theta < 0) { line_theta += CV_2PI; } if (line_theta < 0) { cout << "still<0" << endl; } double Rec_X = line_theta * scale_theta; dst.at<uchar>(i, j) = src.at<uchar>((int)Rec_Y, (int)Rec_X); } } } namedWindow("OutputImages", WINDOW_NORMAL); imshow("OutputImages",dst); imwrite("C:/Users/Liuru/Desktop/result.jpg", dst); printf("%d", dstSize.width); waitKey(); return 0;
}
The following is the nth version of CUDA program I have rewritten:
#include
#include"opencv2/opencv.hpp"#include<cuda_runtime.h>
#include<device_launch_parameters.h>
using namespace std;
using namespace cv;global void polartrans(const uchar3* d_in, uchar3* d_out, int dstheight, int dstwidth, Point2d center,int width) {
double scale_theta = width / CV_2PI; for (int i = blockDim.y * blockIdx.y + threadIdx.y; i < dstheight; i += gridDim.y * blockDim.y) for (int j = blockDim.x * blockIdx.x + threadIdx.x; j < dstwidth; j += gridDim.x * blockDim.x) { double distance = sqrt(((i - center.y) * (i - center.y)) + ((j - center.x) * (j - center.x))); //printf("distance:%f i = %d\n" ,distance,i); if (distance < dstwidth / 2) { double Rec_Y = distance; if (Rec_Y < 0) { Rec_Y = 0; } if (Rec_Y > dstwidth / 2) { Rec_Y = dstwidth / 2; } double line_theta = atan2(i - center.y, j - center.x); if (line_theta < 0) { line_theta += CV_2PI; } if (line_theta < 0) { //cout << "still<0" << endl; } double Rec_X = line_theta * scale_theta; //dst.at<uchar>(i, j) = src.at<uchar>((int)Rec_Y, (int)Rec_X); //d_out[i * dstwidth + j] = d_in[(int)Rec_Y * dstwidth + (int)Rec_X]; //d_out[i * dstwidth + j].x = d_in[(int)Rec_Y * dstwidth + (int)Rec_X].x; //d_out[i * dstwidth + j].y = d_in[(int)Rec_Y * dstwidth + (int)Rec_X].y; //d_out[i * dstwidth + j].z = d_in[(int)Rec_Y * dstwidth + (int)Rec_X].z; d_out[i * width + j].x = d_in[(int)Rec_Y * width + (int)Rec_X].x; d_out[i * width + j].y = d_in[(int)Rec_Y * width + (int)Rec_X].y; d_out[i * width + j].z = d_in[(int)Rec_Y * width + (int)Rec_X].z; } }
}
int main()
{
Mat src;
src = imread(“…/3.png”);namedWindow("InputImages", WINDOW_NORMAL); imshow("InputImages", src); int height = src.rows; int width = src.cols; size_t src_size = sizeof(uchar3) * height * width; Size dstSize = Size(2 * height, 2 * height); Mat dst = Mat::zeros(dstSize, CV_8UC1); int dstheight = dstSize.height; int dstwidth = dstSize.width; size_t dst_size = sizeof(uchar3) * height * width*4; uchar3* d_in = NULL; uchar3* d_out = NULL; uchar3* h_out = (uchar3*)dst.data+1; cudaMalloc((void**)&d_in, src_size); cudaMalloc((void**)&d_out, dst_size); cudaMemcpy(d_in, (uchar3*)src.data, src_size, cudaMemcpyHostToDevice); Point2d center(dstSize.width / 2, dstSize.width / 2); dim3 dimGrid(8, 8, 1); dim3 dimBlock(32, 32, 1); polartrans << <dimGrid, dimBlock >> > (d_in, d_out,dstheight,dstwidth, center,width); cudaMemcpy(h_out, d_out, dst_size, cudaMemcpyDeviceToHost); cv::imshow("polartocart", dst); cv::waitKey(); cudaFree(d_in); cudaFree(d_out); return 0;
}
I have made many attempts when the input and output array sizes are inconsistent. If you can solve this problem, I would be grateful.