An Error of Image Copy

I did a simply copy image work via GPU, but there are errors on the pixels near right and bottom edges.

I tested three case:

  1. Directly define 0;
    2 Directly define 255;
  2. Copy data from texture memory

The followings are codes:

main function:

using namespace std;
using namespace cimg_library;
//extern “C” void radon_gpu(float , float , int, int);
//void radon_gpu(float , float, int, int);
extern “C” void radon_test(float
, float
, int, int);

int main(int argc, char* argv)
{
CUT_DEVICE_INIT(argc, argv);

//testCUDA();

CImg<unsigned char> input_image("phantom.jpg"); //read image


int input_width = input_image.dimx();
int input_height = input_image.dimy(); // get the width and height of image

int rotated_size = sqrt (pow((double)input_width, 2) + pow((double)input_height, 2)); // size of rotated image

CImg<float> host_image(rotated_size,rotated_size);
host_image.fill(0); // initialize rotated image as value 0

int off_point_x = (int) floor ( (rotated_size - input_width) / 2 + 0.5 );
int off_point_y = (int) floor ( (rotated_size - input_height) / 2 + 0.5 );

for ( int x = 0; x < input_width; x++)
	for ( int y = 0; y < input_height; y ++)
		host_image (x + off_point_x, y + off_point_y) =(float) input_image(x,y);

CImg<float> output_image(rotated_size,rotated_size);
//output_image.fill(255);

CImg<unsigned char> show_image(rotated_size,rotated_size);
//show_image.fill(0);

radon_test(host_image.data, output_image.data, rotated_size, rotated_size);


for ( int x1 = 0; x1 < show_image.dimx(); ++ x1)
	for ( int y1 = 0; y1 < show_image.dimy(); ++ y1)
		show_image(x1, y1) = (unsigned char) output_image(x1,y1);
		//show_image(x1, y1) = (unsigned char) 255;
CImgDisplay main_disp(show_image);


main_disp.wait();





return 0;

}

CU files:

#include <stdio.h>
#include <cuda_runtime.h>
#include <cutil.h>
#include “gpuRadon.h”

texture<float, 2, cudaReadModeElementType> tex2DRef;

global void radon_testD(float*, int , int );

extern “C” void radon_test(float* host_input, float* host_output, int width, int height)
{

float *imageRadonDevice;

int sizeOfImage = width * height; //size of input image	

// int sizeOfOutput = height*THETA * N; // size of output image
//
// cudaError_t cudaErr = cudaSuccess;
// copy imageHost to texture memory, it is the orginal input image
cudaChannelFormatDesc cf = cudaCreateChannelDesc();
cudaArray *ImageDevice = 0;
CUDA_SAFE_CALL( cudaMallocArray(&ImageDevice, &cf, width, height) ); // allocate the array memory
CUDA_SAFE_CALL( cudaMemcpyToArray(ImageDevice, 0, 0, host_input, sizeOfImage * sizeof(float), cudaMemcpyHostToDevice) ); // copy the image from host to device
CUDA_SAFE_CALL( cudaBindTextureToArray(tex2DRef, ImageDevice) );
tex2DRef.filterMode = cudaFilterModePoint;
tex2DRef.addressMode[0] = cudaAddressModeClamp;
tex2DRef.addressMode[1] = cudaAddressModeClamp;

//allocate the memory on device    
//CUDA_SAFE_CALL( cudaMalloc( (void**) &imageRotatedDevice, sizeOfImage * sizeof(float) ) ); 
CUDA_SAFE_CALL( cudaMalloc( (void**) &imageRadonDevice, sizeOfImage * sizeof(float)) );	

// //Compute the execution configuration assuming
//
dim3 dimBlock(BLOCK_SIZE, BLOCK_SIZE);
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y);

// //Launch the Radon function on device
radon_testD<<<dimGrid, dimBlock>>> (imageRadonDevice,width, height);

CUDA_SAFE_CALL( cudaMemcpy(host_output, imageRadonDevice, sizeOfImage * sizeof(float), cudaMemcpyDeviceToHost) );

CUDA_SAFE_CALL( cudaFree(imageRadonDevice) );

}

global void radon_testD(float* outImage, int width, int height)
{

int ix = blockIdx.x * BLOCK_SIZE + threadIdx.x;
int iy = blockIdx.y * BLOCK_SIZE + threadIdx.y; //base index
int idx = iy * width + ix;  


//outImage[idx] = 0.0f;  //set up the value of pixel as 0.0
//outImage[idx] = 255.0f;  //set up the value of pixel as 255
outImage[idx] = tex2D(tex2DRef, ix,iy);
__syncthreads();
 
return;

}

The results are attached
Doc1.doc (73.5 KB)