I When I run the following code lines, I have the result of [-19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20;…
void ndgridFunction(int height, int width, Mat *rs, Mat *cs){
for(int i = -(height-1); i<= height; i++){
for(int j= -(width - 1);j <= width; j++){
cs->at<float>(i+(height - 1 ),j+(width - 1)) = float(j);
rs->at<float>(i+(height - 1 ),j+(width - 1)) = float(i);
}
}
}
int main()
{
int height = 15;
int width = 20;
Mat cs = Mat(height*2, width*2, CV_32F);
Mat rs = Mat(height*2, width*2, CV_32F);
ndgridFunction(height,width, &rs, &cs);
cout<<cs<<endl;
}
}
I convert it to Cuda codes in the following but I reach a different result. Here is my Cuda kernel and result.
__global__ void ndgridFunctionDev ( float *rs, float *cs,int height ,int width){
int xIndex = blockIdx.x * blockDim.x + threadIdx.x;
int yIndex = blockIdx.y * blockDim.y + threadIdx.y;
if((xIndex<=width) && (yIndex<height)){
//const int tid = yIndex * width + xIndex;
for(int i = -(height-1); i<= height; i++){
for(int j= -(width - 1);j <= width; j++){
cs[(xIndex+i-1)*width+(yIndex+j-1)] = float(j);
rs[(xIndex+i-1)*width+(yIndex+j-1)] = float(i);
}
}
}
}
void equalImagesFunc(cv::Mat& input,cv::Mat& gray, int height, int width){
const int inputBytes = input.step * input.rows;
const int grayBytes = gray.step * gray.rows;
float *d_input, *d_gray;
cudaMalloc((void**)&d_input,inputBytes);
cudaMalloc((void**)&d_gray,grayBytes);
const dim3 block(16,16);
const dim3 grid(8,8);
//cout << "row = " << input.rows << "cols= " << input.cols << endl;
ndgridFunctionDev<<<grid,block>>>(d_input,d_gray,width,height);
cudaDeviceSynchronize();
cudaMemcpy(input.ptr(),d_input,inputBytes,cudaMemcpyDeviceToHost);
cudaMemcpy(gray.ptr(),d_gray,grayBytes,cudaMemcpyDeviceToHost);
cudaFree(d_input);
cudaFree(d_gray);
}
int main(void){
int height = 15;
int width = 20;
Mat cs = Mat(height*2, width*2, CV_32F);
Mat rs = Mat(height*2, width*2, CV_32F);
equalImagesFunc(cs,rs,height,width);
cout<<cs<<endl;
}
And result is :[1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4; 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7;…
How can I implement it to give me the same result ?