Hi!
I’m trying to undistort and filter an image, loading it as a texture, but I can’t find the way of filtering it using only 1byte words.
If I load the texture as float values, there’s no problem, it works perfectly.
(texture<float, 2, cudaReadModeElementType>tex, etc)
The problem is when I try to load the texture as unsigned char values, as I explain in the code below.
I think that all I’ve got to do is define texture as cudaReadModeNormalizedFloat, and enable filter mode, but the result I get is a empty, completely black image.
I’ve simplyfied the code to work with a BW image, and I’ve wrote all that is related with texture in blue color, in order to do it easier to read.
Can someone help me?
I’m working on a Quadro FX 3700, and Fedora 9.
Thanks!!
#include <cuda.h>
#include <libs/camera.h>
constant int PITCH;
device float* img_global;
texture <unsigned char, 2, cudaReadModeNormalizedFloat> tex;
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(8, 0, 0, 0, cudaChannelFormatKindUnsigned);
device float2 distort_my_point(int px, int py, Tcamera cam )
{
//Adding distortion
float a1, a2, a3, delta_x, delta_y, dx,dy;
float r2 = (px-cam.K20)(px-cam.K20)/(cam.K00cam.K00) + (py-cam.K21)(py-cam.K21)/(cam.K11cam.K11);
float r4 = r2 * r2;
a1 = 2. * (px-cam.K20) * (py-cam.K21)/ (cam.K00*cam.K11);
a2 = r2 + 2. * (px-cam.K20) * (px-cam.K20)/ (cam.K00*cam.K00);
a3 = r2 + 2. * (py-cam.K21) * (py-cam.K21)/ (cam.K11*cam.K11);
delta_x = cam.dis2 * a1 + cam.dis3 * a2;
delta_y = cam.dis2 * a3 + cam.dis3 * a1;
delta_x = delta_x * cam.K00;
delta_y = delta_y * cam.K11;
dx = px + (px-cam.K20)(cam.dis0r2 + cam.dis1*r4) + delta_x;
dy = py + (py-cam.K21)*(cam.dis0*r2 + cam.dis1*r4) + delta_y;
return make_float2(dx, dy);
}
global void undistort_image_kernel(float *out, int2 size, Tcamera cam)
{
int2 thread = make_int2(blockDim.x * blockIdx.x + threadIdx.x, blockDim.y * blockIdx.y + threadIdx.y);
if(thread.x < size.x && thread.y < size.y)
{
int index = thread.y * PITCH + thread.x;
float2 distorted_point = distort_my_point(thread.x, thread.y, cam);
out[index] = tex2D(tex, distorted_point.x, distorted_point.y);
}
}
host void undistort_image(bool mean, int im_size, int2 size, dim3 dimBlock, dim3 dimGrid, Tcamera cam, unsigned char *image)
{
tex.addressMode[0] = cudaAddressModeClamp;
tex.addressMode[1] = cudaAddressModeClamp;
tex.filterMode = cudaFilterModeLinear;
tex.normalized = false;
tex.channelDesc= cudaCreateChannelDesc(32,0,0,0,cudaChannelFormatKindFloat); //I've seen that piece of code in the nvidia forums, but it does not work.
cudaArray* array;
cudaMallocArray(&array, &channelDesc, size.x, size.y );
cudaMemcpyToArray(array, 0, 0, image, im_size*sizeof(unsigned char), cudaMemcpyHostToDevice);
[b] //until that point, everything is ok, i can put the cudaArray into a float array and save the images,
//and they are correctly undistorted.[/b]
cudaBindTextureToArray(tex, array, channelDesc)
;
undistort_image_kernel <<<dimGrid, dimBlock>>> (img_global, size, cam);
checkCUDAError("undistort img kernel");
cudaThreadSynchronize();
cudaUnbindTexture(tex);
cudaFreeArray(array);
////In order to get the result images
size_t pitch;
cudaMemcpyFromSymbol(&pitch, PITCH, sizeof(int), 0, cudaMemcpyDeviceToHost);
pitch = (int) pitch * sizeof(float);
float * host = (float*)malloc(im_size*sizeof(float));
cudaMemcpy2D(host, size.x*sizeof(float), img_global, pitch, size.x*sizeof(float), size.y, cudaMemcpyDeviceToHost);
checkCUDAError("memcpy2D");
for(int i=0; i<im_size; i++)
{
img_in[i] = (unsigned char)host[i];
}
free(host);
}
extern “C” void memory_allocation_p(int im_width, int im_height, Tcamera cam)
{
int2 size = make_int2(im_width, im_height);
//block and grid dimensions
dim3 dimBlock(THREADS_X, THREADS_Y);
int blocks_x = im_width / THREADS_X;
int blocks_y = im_height / THREADS_Y;
if(im_width%THREADS_X !=0) blocks_x++;
if(im_height%THREADS_Y !=0) blocks_y++;
dim3 dimGrid(blocks_x, blocks_y);
//memory allocation for global vars
size_t pitch_dependant;
cudaMallocPitch((void**)&img_global, &pitch_dependant, im_width*sizeof(float), im_height);
int img_pitch = pitch_dependant / sizeof(float);
cudaMemcpyToSymbol(PITCH, &img_pitch, sizeof(int), 0, cudaMemcpyHostToDevice);
}
extern “C” void call_undistort(unsigned char *image ,int im_width, int im_height, Tcamera cam)
{
//basic declarations
int2 size = make_int2(im_width, im_height);
unsigned int im_size = im_width * im_height;
//block and grid dimensions
dim3 dimBlock(THREADS_X, THREADS_Y);
int blocks_x = im_width / THREADS_X;
int blocks_y = im_height / THREADS_Y;
if(im_width%THREADS_X !=0) blocks_x++;
if(im_height%THREADS_Y !=0) blocks_y++;
dim3 dimGrid(blocks_x, blocks_y);
undistort_image(im_width*im_height, size, dimBlock, dimGrid, cam, image);
}
extern “C” void memFree()
{
cudaFree(img_global);
}