i am try copy imageData (a OpenCV IPLimage which is formatted to unsigned 8bit characters) to cuda array ,and bind to texture memory .Then handle it in the kernel .but the result was wrong :( i have deal with it the whole day,kernel is simple code
__global__ void undistort(float* dstdata, int width, int height)
{
unsigned int u = threadIdx.x+blockIdx.x*blockDim.x;
unsigned int v = threadIdx.y+blockIdx.y*blockDim.y;
int u_coor,v_coor;
float x,y;
float xtmp,ytmp;
float r;
x = (u-u0)*pixel;
y = (v-v0)*pixel;
r = sqrt(pow(x-X0,2)+pow(y-Y0,2));
xtmp = x-X0
+dis[0]*pow(r,2)*(x-X0)
+dis[1]*pow(r,4)*(x-X0)
//+k3*pow(r,6)*(x-X0)
+dis[2]*(pow(r,2)+2*pow(x-X0,2))
+2*dis[3]*(x-X0)*(y-Y0);
//+b1*(x-X0)
//+b2*(y-Y0);
ytmp = y-Y0
+dis[0]*pow(r,2)*(y-Y0)
+dis[1]*pow(r,4)*(y-Y0)
//+k3*pow(r,6)*(y-Y0)
+dis[3]*(pow(r,2)+2*pow(y-Y0,2))
+2*dis[2]*(x-X0)*(y-Y0);
u_coor = xtmp/pixel+u0;
v_coor = ytmp/pixel+v0;
//get texture data
dstdata[v * width + u] = tex2D(texsrc, u_coor, v_coor);
}
the host code is
#define DISTORTION_NUM 4
__constant__ float dis[DISTORTION_NUM];
__constant__ float pixel = 0.006400;
__constant__ float X0 = -0.024100;
__constant__ float Y0 = 0.058500;
__constant__ float f = 24.405500;
__constant__ int u0 = 5616/2;
__constant__ int v0 = 3744/2;
texture<uchar, 2, cudaReadModeElementType> texsrc;
int main (int argc, char** argv)
{
IplImage *src_img,*dst_img;
if((src_img = cvLoadImage(argv[1],-1)) != 0)
{
int src_w = src_img->width;
int src_h = src_img->height;
int src_size = src_h*src_w;
dst_img = cvCloneImage(src_img);
dst_img->origin = src_img->origin;
cvZero(dst_img);
int dst_h = dst_img->height;
int dst_w = dst_img->width;
int dst_size = dst_h*dst_w;
float *tmp_dis = (float* )malloc(sizeof(float)*DISTORTION_NUM);
tmp_dis[0] = -1.724000e-004; //k1
tmp_dis[1] = 2.642000e-007; //k2
tmp_dis[2] = -3.814000e-006; //p1
tmp_dis[3] = 7.154000e-006; //p2
cutilSafeCall(cudaMemcpyToSymbol(dis, tmp_dis, sizeof(float)*DISTORTION_NUM));
free(tmp_dis);
cudaEvent_t start, stop;
cutilSafeCall(cudaEventCreate(&start));
cutilSafeCall(cudaEventCreate(&stop));
cutilSafeCall(cudaEventRecord(start,0));
cudaChannelFormatDesc channelDes = cudaCreateChannelDesc<float>();
cudaArray* srcArray;
cudaMallocArray(&srcArray, &channelDes, src_w, src_h);
cudaMemcpy2DToArray(srcArray, 0, 0, src_img->imageData, sizeof(unsigned char) * src_img->widthStep,
sizeof(unsigned char) * src_img->width, src_img->height, cudaMemcpyHostToDevice);
texsrc.addressMode[0] = cudaAddressModeWrap;
texsrc.addressMode[1] = cudaAddressModeWrap;
texsrc.filterMode = cudaFilterModeLinear;
texsrc.normalized = 1;
cudaBindTextureToArray(texsrc, srcArray, channelDes);
float* dst_data;
cudaMalloc((void**)&dst_data, dst_size*sizeof(float));
dim3 dimBlock(16, 16, 1);
dim3 dimGrid((src_w+dimBlock.x-1)/dimBlock.x, (src_h+dimBlock.y-1)/dimBlock.y);
undistort<<<dimGrid, dimBlock, 0>>>(dst_data, src_w, src_h);
cutilCheckMsg("undistort failed ");
cudaThreadSynchronize();
cutilSafeCall(cudaEventRecord(stop,0));
cutilSafeCall(cudaEventSynchronize(stop));
float elapsedtime;
cutilSafeCall(cudaEventElapsedTime(&elapsedtime,start,stop));
printf("Time to undistort: %3.1f ms\n",elapsedtime);
cutilSafeCall(cudaEventDestroy(start));
cutilSafeCall(cudaEventDestroy(stop));
cutilSafeCall(cudaMemcpy(dst_img->imageData,dst_data,dst_size,cudaMemcpyDeviceToHost));
cvNamedWindow("Undistort",0);
cvShowImage("Undistort",dst_img);
cvSaveImage(argv[2],dst_img,0);
cvWaitKey(0);
cudaUnbindTexture(texsrc);
cudaFree(dst_data);
cudaFreeArray(srcArray);
cvReleaseImage(&src_img);
cvReleaseImage(&dst_img);
}
return 0 ;
}
I want the filter mode be Linear,but if cudaCreateChannelDesc() ,i get the error message
,but the imageData is unsigned 8bit characters (uchar *),i want know what is wrong with my code ,why
undistort-cuda.exe input,jpg result.jpg
the reslut is strange .need your help ,here is the code
undistort-cuda.cu (4.03 KB)
,best regard
7rack