I’m trying to write a small program that goes over an image, and manipulates it by using iterations, means - each manipulation is dependent on the previous one.
As a start, I’m just trying to keep the original image after a few iterations, without manipulating it, just to make sure I can handle the image correctly.
I’m trying to base my code on the porvided simpleTexture, but I got a weird problem that I cannot manage to solve: the image is getting blurred and shifted. The more iterations I run, the worst it gets.
This is my simple kernel:
__global__ void
transformKernel( float* g_odata, int width, int height)
// calculate normalized texture coordinates
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
float u = x / (float) width;
float v = y / (float) height;
// read from texture and write to global memory
g_odata[y*width + x] = (tex2D(tex,u,v));
And this is the code the calls the kernel:
runTest( int argc, char** argv)
// load image from disk
float* h_data = NULL;
unsigned int width, height;
char* image_path = cutFindFilePath(image_filename, argv[0]);
if (image_path == 0)
CUT_SAFE_CALL( cutLoadPGMf(image_path, &h_data, &width, &height));
unsigned int size = width * height * sizeof(float);
printf("Loaded '%s', %d x %d pixels\n", image_filename, width, height);
// allocate device memory for result
float* d_data = NULL;
CUDA_SAFE_CALL( cudaMalloc( (void**) &d_data, size));
// allocate array and copy image data
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
cudaArray* cu_array;
CUDA_SAFE_CALL( cudaMallocArray( &cu_array, &channelDesc, width, height ));
CUDA_SAFE_CALL( cudaMemcpyToArray( cu_array, 0, 0, h_data, size, cudaMemcpyHostToDevice));
// set texture parameters
tex.addressMode[0] = cudaAddressModeWrap;
tex.addressMode[1] = cudaAddressModeWrap;
tex.filterMode = cudaFilterModeLinear;
tex.normalized = true; // access with normalized texture coordinates
// Bind the array to the texture
CUDA_SAFE_CALL( cudaBindTextureToArray( tex, cu_array, channelDesc));
dim3 dimBlock(8, 8, 1);
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1);
for(int i=0;i<100;i++){
CUDA_SAFE_CALL( cudaThreadSynchronize() );
unsigned int timer = 0;
CUT_SAFE_CALL( cutCreateTimer( &timer));
CUT_SAFE_CALL( cutStartTimer( timer));
// execute the kernel
CUDA_SAFE_CALL( cudaThreadSynchronize() );
transformKernel<<< dimGrid, dimBlock, 0 >>>( d_data, width, height);
CUDA_SAFE_CALL( cudaThreadSynchronize() );
CUDA_SAFE_CALL( cudaMallocArray( &cu_array, &channelDesc, width, height ));
CUDA_SAFE_CALL(cudaMemcpyToArray(cu_array, 0, 0, d_data, size, cudaMemcpyDeviceToDevice));
CUDA_SAFE_CALL( cudaThreadSynchronize() );
CUDA_SAFE_CALL( cudaBindTextureToArray( tex, cu_array, channelDesc));
// check if kernel execution generated an error
CUT_CHECK_ERROR("Kernel execution failed");
CUDA_SAFE_CALL( cudaThreadSynchronize() );
CUT_SAFE_CALL( cutStopTimer( timer));
printf("Processing time: %f (ms)\n", cutGetTimerValue( timer));
printf("%.2f Mpixels/sec\n", (width*height / (cutGetTimerValue( timer) / 1000.0f)) / 1e6);
CUT_SAFE_CALL( cutDeleteTimer( timer));
// allocate mem for the result on host side
float* h_odata = (float*) malloc( size);
// copy result from device to host
CUDA_SAFE_CALL( cudaMemcpy( h_odata, d_data, size, cudaMemcpyDeviceToHost) );
// write result to file
char output_filename[1024];
strcpy(output_filename, image_path);
strcpy(output_filename + strlen(image_path) - 4, "_out.pgm");
CUT_SAFE_CALL( cutSavePGMf( output_filename, h_odata, width, height));
printf("Wrote '%s'\n", output_filename);
// write regression file if necessary
if( cutCheckCmdLineFlag( argc, (const char**) argv, "regression"))
// write file for regression test
CUT_SAFE_CALL( cutWriteFilef( "./data/regression.dat", h_odata, width*height, 0.0));
// cleanup memory
The code is based on the simpleTexture project, I only added the loop and the update of the texture on the device so changes of each iteration will have an affect.
I attached the blurred pgm file I get as a result of the above code.
If anyone has any clue on why do I get such weird results, or how to run over an image which is stored in a float texutre, I’ll be grateful, since I havn’t got any lead…