Hello everyone,
I am implementing Anisotropic Diffusion algorithm in OpenCL. I am porting the code from CUDA to Opencl Being completly new to OpenCL i am having trouble access the pixels in my image.
My .cpp code snippet is given below.
[codebox]while(iter != 0) {
//printf(“iteration number %d \n”,iter);
//load images in opencl buffer
ErrNum= clEnqueueWriteBuffer(commandQueue,d_img_noised, CL_TRUE,0, sizeof(float)widthheight,img_noised,0,NULL,NULL);
shrCheckError(ErrNum, CL_SUCCESS);
ErrNum= clEnqueueWriteBuffer(commandQueue,d_img_proc_med,CL_TRUE,0,s
izeof(float)widthheight,img_proc_med,0,NULL,NULL);
shrCheckError(ErrNum, CL_SUCCESS);
ErrNum= clEnqueueWriteBuffer(commandQueue,d_img_proc_PDE,CL_TRUE,0,s
izeof(float)widthheight,img_proc_PDE,0,NULL,NULL);
shrCheckError(ErrNum, CL_SUCCESS);
ErrNum= clEnqueueWriteBuffer(commandQueue,d_img_sgn,CL_TRUE,0,sizeof
(float)widthheight,img_sgn,0,NULL,NULL);
shrCheckError(ErrNum, CL_SUCCESS);
ErrNum= clEnqueueWriteBuffer(commandQueue,d_img_temp,CL_TRUE,0,sizeo
f(float)widthheight,img_temp,0,NULL,NULL);
shrCheckError(ErrNum, CL_SUCCESS);
int counter =30; //iteration time
//const int threshold=25;
while(counter != 0){
//MedianFilter_Sgn_Kernel
//set kernel arguments
//printf(“the count is counter %d”, counter);
cl_uint i=0;
ErrNum= clSetKernelArg(Kernel1,i++,sizeof(cl_mem),(void*)&d_img_proc_PDE);
shrCheckError(ErrNum, CL_SUCCESS);
ErrNum= clSetKernelArg(Kernel1,i++,sizeof(cl_mem),(void*)&d_img_sgn);
shrCheckError(ErrNum, CL_SUCCESS);
ErrNum= clSetKernelArg(Kernel1,i++,sizeof(size_t),&threshold);
shrCheckError(ErrNum, CL_SUCCESS);
ErrNum= clSetKernelArg(Kernel1,i++,sizeof(size_t),&width);
shrCheckError(ErrNum, CL_SUCCESS);
ErrNum= clSetKernelArg(Kernel1,i++,sizeof(size_t),&height);
shrCheckError(ErrNum, CL_SUCCESS);
//printf(“the width is %d height is %d threshold is %d”,width,height,threshold);
//cl_kernel Kernel[2];
//size_t global_work_size= BLOCKSIZE
ErrNum= clEnqueueNDRangeKernel(commandQueue,Kernel1,2,NULL,&global_work_size,0,0,0,0);
/*if(ErrNum == CL_SUCCESS){
printf(“kernel 1 is executed!!\n”);}
*/shrCheckError(ErrNum, CL_SUCCESS);
//FilterPDE_Sgn_Kernel
cl_uint j=0;
ErrNum= clSetKernelArg(Kernel2,j++,sizeof(cl_mem),(void*)&d_img_proc_PDE);
shrCheckError(ErrNum, CL_SUCCESS);
ErrNum= clSetKernelArg(Kernel2,j++,sizeof(cl_mem),(void*)&d_img_sgn);
shrCheckError(ErrNum, CL_SUCCESS);
ErrNum= clSetKernelArg(Kernel2,j++,sizeof(cl_mem),(void*)&d_img_temp);
shrCheckError(ErrNum, CL_SUCCESS);
ErrNum= clSetKernelArg(Kernel2,j++,sizeof(size_t),&width);
shrCheckError(ErrNum, CL_SUCCESS);
ErrNum= clSetKernelArg(Kernel2,j++,sizeof(size_t),&height);
shrCheckError(ErrNum, CL_SUCCESS);
ErrNum=clEnqueueNDRangeKernel(commandQueue,Kernel2,2,NULL,&global_work_size,0,0,0,0);
/*if(ErrNum == CL_SUCCESS){
printf(“kernel 2 is executed!!\n”);}
*/shrCheckError(ErrNum, CL_SUCCESS);
//put a barrier
clFinish(commandQueue);
//PSNR
PSNR(img_original,img_proc_PDE,&psnr,width,height);
//printf(“the PSNRfor iteration number %d is %f\n”,counter, psnr);
//ErrNum = clEnqueueReadBuffer(commandQueue,d_img_proc_PDE,CL_TRUE,0,si
zeof(float)widthheight,(void*)img_proc_PDE,0,NULL,NULL);
counter–;
}
ErrNum = clEnqueueReadBuffer(commandQueue,d_img_proc_PDE,CL_TRUE,0,si
zeof(float)widthheight,img_proc_PDE,0,NULL,NULL);
iter–;
}
[/codebox]
Given below is the kernel code snippet where the threads are getting accessed. or not getting accessed :(
[codebox]/*if(ErrNum == CL_SUCCESS){
printf(“kernel 2 is executed!!\n”);}
*/shrCheckError(ErrNum, CL_SUCCESS);
//put a barrier
clFinish(commandQueue);
//PSNR
PSNR(img_original,img_proc_PDE,&psnr,width,height);
//printf(“the PSNRfor iteration number %d is %f\n”,counter, psnr);
//ErrNum = clEnqueueReadBuffer(commandQueue,d_img_proc_PDE,CL_TRUE,0,si
zeof(float)widthheight,(void*)img_proc_PDE,0,NULL,NULL);
counter–;
}
ErrNum = clEnqueueReadBuffer(commandQueue,d_img_proc_PDE,CL_TRUE,0,si
zeof(float)widthheight,img_proc_PDE,0,NULL,NULL);
iter–;
}
.
.
.
.
Rest of the code for computation.[/codebox]
I am accessing the noised image multiple times but for some reason the threads are not accessing the pixels . As a result my output image is same as the noised input image. Can you please tell me as to why this is happening. And I am pretty sure there is nothing wrong with the logic of the implementation as it is working just fine for the CUDA implementation.