opencv imageData copy to cuda

7rack · May 19, 2012, 4:26pm

i am try copy imageData (a OpenCV IPLimage which is formatted to unsigned 8bit characters) to cuda array ,and bind to texture memory .Then handle it in the kernel .but the result was wrong :( i have deal with it the whole day,kernel is simple code

__global__ void undistort(float* dstdata, int width, int height)

{	

	

	unsigned int u = threadIdx.x+blockIdx.x*blockDim.x;

	unsigned int v = threadIdx.y+blockIdx.y*blockDim.y;

	  int u_coor,v_coor;

	  float x,y;

	  float xtmp,ytmp;

	  float r;

	 

		x = (u-u0)*pixel;

		y = (v-v0)*pixel;

		r = sqrt(pow(x-X0,2)+pow(y-Y0,2));

		

		xtmp = x-X0

			  +dis[0]*pow(r,2)*(x-X0)

			  +dis[1]*pow(r,4)*(x-X0)

			  //+k3*pow(r,6)*(x-X0)

			  +dis[2]*(pow(r,2)+2*pow(x-X0,2))

			  +2*dis[3]*(x-X0)*(y-Y0);

			  //+b1*(x-X0)

			  //+b2*(y-Y0);

		

		ytmp = y-Y0

			 +dis[0]*pow(r,2)*(y-Y0)

			 +dis[1]*pow(r,4)*(y-Y0)

			 //+k3*pow(r,6)*(y-Y0)

			 +dis[3]*(pow(r,2)+2*pow(y-Y0,2))

			 +2*dis[2]*(x-X0)*(y-Y0);

		u_coor = xtmp/pixel+u0;

		v_coor = ytmp/pixel+v0;

		

		//get texture data  

		dstdata[v * width + u] = tex2D(texsrc, u_coor, v_coor);

}

the host code is

#define DISTORTION_NUM 4

__constant__ float dis[DISTORTION_NUM];

__constant__ float pixel = 0.006400;

__constant__ float X0 = -0.024100;

__constant__ float Y0 = 0.058500;

__constant__ float f = 24.405500;

__constant__ int u0 = 5616/2;

__constant__ int v0 = 3744/2;

texture<uchar, 2, cudaReadModeElementType> texsrc;

int main (int argc, char** argv)

{

	IplImage *src_img,*dst_img;

	if((src_img = cvLoadImage(argv[1],-1)) != 0)

  {

	int src_w = src_img->width;

	int src_h = src_img->height;

	int src_size = src_h*src_w; 

	dst_img = cvCloneImage(src_img);

	dst_img->origin = src_img->origin;

	cvZero(dst_img);

	int dst_h = dst_img->height;

	int dst_w = dst_img->width;

	int dst_size = dst_h*dst_w;

	float *tmp_dis = (float* )malloc(sizeof(float)*DISTORTION_NUM);

	tmp_dis[0] = -1.724000e-004; //k1

	tmp_dis[1] = 2.642000e-007;  //k2

	tmp_dis[2] = -3.814000e-006; //p1

	tmp_dis[3] = 7.154000e-006;  //p2

	cutilSafeCall(cudaMemcpyToSymbol(dis, tmp_dis, sizeof(float)*DISTORTION_NUM));

	free(tmp_dis);

	

	cudaEvent_t start, stop;

	cutilSafeCall(cudaEventCreate(&start));

	cutilSafeCall(cudaEventCreate(&stop));

	cutilSafeCall(cudaEventRecord(start,0));

	cudaChannelFormatDesc channelDes = cudaCreateChannelDesc<float>();

	cudaArray* srcArray;

	cudaMallocArray(&srcArray, &channelDes, src_w, src_h);

	

	cudaMemcpy2DToArray(srcArray, 0, 0, src_img->imageData, sizeof(unsigned char) * src_img->widthStep, 

						sizeof(unsigned char) * src_img->width, src_img->height, cudaMemcpyHostToDevice);

	

	texsrc.addressMode[0] = cudaAddressModeWrap;

	texsrc.addressMode[1] = cudaAddressModeWrap;

	

	texsrc.filterMode = cudaFilterModeLinear;

	texsrc.normalized = 1;

	

	cudaBindTextureToArray(texsrc, srcArray, channelDes);

	

	float* dst_data;

	cudaMalloc((void**)&dst_data, dst_size*sizeof(float));

	dim3 dimBlock(16, 16, 1);

	dim3 dimGrid((src_w+dimBlock.x-1)/dimBlock.x, (src_h+dimBlock.y-1)/dimBlock.y);

	

	undistort<<<dimGrid, dimBlock, 0>>>(dst_data, src_w, src_h);

	cutilCheckMsg("undistort failed ");

	cudaThreadSynchronize();

	

	cutilSafeCall(cudaEventRecord(stop,0));

	cutilSafeCall(cudaEventSynchronize(stop));

	float elapsedtime;

	cutilSafeCall(cudaEventElapsedTime(&elapsedtime,start,stop));

	printf("Time to undistort:	%3.1f ms\n",elapsedtime);

	cutilSafeCall(cudaEventDestroy(start)); 

	cutilSafeCall(cudaEventDestroy(stop));

	cutilSafeCall(cudaMemcpy(dst_img->imageData,dst_data,dst_size,cudaMemcpyDeviceToHost));

	

	cvNamedWindow("Undistort",0);

	cvShowImage("Undistort",dst_img);

	cvSaveImage(argv[2],dst_img,0);

	cvWaitKey(0);

	cudaUnbindTexture(texsrc);

	cudaFree(dst_data);

	cudaFreeArray(srcArray);

	cvReleaseImage(&src_img);

	cvReleaseImage(&dst_img);

  }

	return 0 ;

}

I want the filter mode be Linear,but if cudaCreateChannelDesc() ,i get the error message

,but the imageData is unsigned 8bit characters (uchar *),i want know what is wrong with my code ,why

undistort-cuda.exe input,jpg result.jpg

the reslut is strange .need your help ,here is the code

undistort-cuda.cu (4.03 KB)

,best regard

7rack

apostglen46 · May 19, 2012, 7:37pm

have you tried without the texture?

7rack · May 20, 2012, 12:59am

yes,and failed again:(.the texture memory more simple , i think .but i dont know what’s wrong .do you think the imageData copy to array and the result copy to host not working correctly ?

Sarnath · May 21, 2012, 3:56am

Just a random thought. Interpolation works well when you give floating coordinates…

But you are giving integer coordinates.

I hope you are looking for hardware interpolation, aren’t you?

Be aware that hardware interpolation is done with less precision and will deviate from similar CPU code results

7rack · May 21, 2012, 9:34am

i think you are right,i have changed

float u_coor,v_coor

,do you mean i should do the Linear by myself ,should not looking for hardware interpolation

texsrc.filterMode = cudaFilterModeLinear;

.but i try the

texsrc.filterMode = cudaFilterModePoint

still wrong (you can use any jpg pic try ,but the result seems like the same ).I think the serious problem is the imageData handle in the wrong way ,here is the another topic same like my problem .

laughingrice · May 22, 2012, 11:58am

if memory serves your channel descriptor should be unsigned char and not float, as the channel descriptor describes the input, not the output.
Are you sure that your input image is gray scale (i.e that it has only one channel)?
As for filtering, CUDA always converts your indexes to float (can cause performance issues if you are not aware of that due to extra type conversion)
Also the interpolation is based on the center or the pixel rather then the edge, i.e you will get your correct pixel value for pixel (0,0) at texture location (0.5,0.5), not (0,0) as most people expect. Giving integral indexes gives the correct value for nearest neighbor interpolation, not linear interpolation
For linear interpolation you need float output, so you should set the last texture template parameter to cudaReadModeNormalizedFloat rather than cudaReadModeElementType (note that your output will be in the range of [0,1] rather than [0,255]
You are using normalized coordinates for the texture access, which means that texture coordinates are in the range of [0,1) X [0,1) rather than [0,n) x [0,m) (i.e you need to index the texture with floating point values in the range between 0 and 1, and not 0 to image width / height)

7rack · May 22, 2012, 2:51pm

if memory serves your channel descriptor should be unsigned char and not float, as the channel descriptor describes the input, not the output.

Are you sure that your input image is gray scale (i.e that it has only one channel)?

As for filtering, CUDA always converts your indexes to float (can cause performance issues if you are not aware of that due to extra type conversion)

Also the interpolation is based on the center or the pixel rather then the edge, i.e you will get your correct pixel value for pixel (0,0) at texture location (0.5,0.5), not (0,0) as most people expect. Giving integral indexes gives the correct value for nearest neighbor interpolation, not linear interpolation

For linear interpolation you need float output, so you should set the last texture template parameter to cudaReadModeNormalizedFloat rather than cudaReadModeElementType (note that your output will be in the range of [0,1] rather than [0,255]

You are using normalized coordinates for the texture access, which means that texture coordinates are in the range of [0,1) X [0,1) rather than [0,n) x [0,m) (i.e you need to index the texture with floating point values in the range between 0 and 1, and not 0 to image width / height)

thank you for your reply,you are the master External Image , the above code lots of basic errors,i can work fine with gray scale yestreday.now i can linear interpolation with the cudaFilterModeLinear or myself codes .now how can i work with 3 or 4channels image ,do i need to uchar4 instead ? i cannot handle it well about how to tex2D() r.g.b.a value .in the forum there a

why i always think tex2D(imgTex, x-1, y)is the nearest pixel value ,am i wrong ? External Image

Topic		Replies	Views
Using tex2D for unsigned short/char CUDA Programming and Performance	14	3621	November 15, 2017
Interleaved Texture Fetch How? CUDA Programming and Performance	4	2625	February 1, 2008
Linear interpolation with textures using unsigned data CUDA Programming and Performance	6	1363	February 20, 2020
Linear interpolation with integer texture. CUDA Programming and Performance	6	2681	August 12, 2022
Bilinear texture interpolation of unsigned char array always returns 1.0 CUDA Programming and Performance	1	867	April 10, 2016
Repeated 1D interpolation with type promotion CUDA Programming and Performance	3	568	October 12, 2021
how to transfer image data into float in Cuda ? CUDA Programming and Performance	2	4481	January 23, 2008
resize with char texture Image returns skewed CUDA Programming and Performance	5	10446	June 13, 2013
Texture Linear Filter doesn't work with uchar! normalizedFloat does not work with uchar, why CUDA Programming and Performance	7	3000	December 16, 2009
cudaFilterModeLinear my fault or bug? CUDA Programming and Performance	4	3362	September 1, 2007

opencv imageData copy to cuda

Related topics