CUDA texture memory alignment issue

The following code at the moment just copies from one pitch array to another, very simple. If the data (an image) is such that the pitch of the data is the same as the width then the following works just fine. However if the pitch isn’t equal then the copyMove function which uses texture functionality does not work (after a few repetitions the image is blurred and starts moving in the positive x, y direction). The normal copy function still works for images of all sizes. Using a non normalized texture also works. Is it just rounding errors that are stacking up after a few thousand frames?

[codebox]

global void copyMove(unsigned int *out, int pitch, int width, int height){

int x = blockIdx.x*blockDim.x + threadIdx.x;

int y = blockIdx.y*blockDim.y + threadIdx.y;

out[pitch*y + x] = rgbaFloatToInt(tex2D(tex, x / (float) width, y / (float) height));

}

global void copy(unsigned int *out, const unsigned int *in, int pitch)

{

int x = blockIdx.x*blockDim.x + threadIdx.x;

int y = blockIdx.y*blockDim.y + threadIdx.y;

out[pitch*y + x] = in[pitch*y + x];

}

extern “C” void moveImage(unsigned int *d_src, unsigned int *d_dest,

						  int width, int height, 

						  cudaChannelFormatDesc *channelDesc, size_t d_pitchBytes,

						  int dx, int dy){

tex.addressMode[0] = cudaAddressModeClamp;

tex.addressMode[1] = cudaAddressModeClamp;

tex.filterMode = cudaFilterModeLinear;

tex.normalized = true;

cutilSafeCall(cudaBindTexture2D(0, &tex, d_src, channelDesc, width, height, d_pitchBytes));

dim3 dimBlock(16, 16); 

dim3 dimGrid(width / dimBlock.x, height / dimBlock.y);

int pitch = d_pitchBytes / sizeof(unsigned int);

copyMove<<< dimGrid, dimBlock >>>(d_dest, pitch, width, height);

//copy<<< dimGrid, dimBlock >>>(d_dest, d_src, pitch);

cutilSafeCall(cudaUnbindTexture(&tex));

}

[/codebox]