Texture addressMode: cudaAddressModeWrap

Hi,

I’m currently trying to understand how the cudaAddressModeWrap for 2D textures works.
I wrote the following code to test things out:

#include <stdio.h>
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
   	if (code != cudaSuccess) {
      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
      if (abort) exit(code);
   	}
}
  
texture<float, 2, cudaReadModeElementType> texRef;
__global__ 
void testTexture(int col, int row) {
	printf("%f\n", tex2D(texRef, col, row));
}
 
int main()
{
	float data[5][4] = 
	{
		{1,2,3,4},
		{5,6,7,8},
		{9,10,11,12},
		{13,14,15,16},
		{17,18,19,20}
	};

	cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
	cudaArray *cuArray;
	gpuErrchk(cudaMallocArray(&cuArray, &channelDesc, 4, 5));
	gpuErrchk(cudaMemcpyToArray(cuArray, 0, 0, data, 5 * 4 * sizeof(float), cudaMemcpyHostToDevice));

	// Set texture parameters
	texRef.addressMode[0] = cudaAddressModeWrap;
	texRef.addressMode[1] = cudaAddressModeWrap;
	//texRef.filterMode = cudaFilterModeLinear;
	//texRef.normalized = true;    // access with normalized texture coordinates

	// Bind the array to the texture
	gpuErrchk(cudaBindTextureToArray(texRef, cuArray, channelDesc));

	testTexture<<<1, 1>>>(4, 0);
	gpuErrchk(cudaFreeArray(cuArray));
	return EXIT_SUCCESS;
}

testTexture<<<1, 1>>>(0, 0) prints, as expected, “1.000000”.
testTexture<<<1, 1>>>(1, 0) prints, as expected, “2.000000”.
testTexture<<<1, 1>>>(2, 0) prints, as expected, “3.000000”.
testTexture<<<1, 1>>>(3, 0) prints, as expected, “4.000000”.

But:
testTexture<<<1, 1>>>(4, 0) prints “4.000000”.
Why? I would expect it to wrap around and print “1.000000” again. How does the wrap mode work?

Edit: I tried it now with a combination of cudaMallocPitch and cudaBindTexture2D. However, this gives me completely wrong values, even for indices inside the texture borders. What is wrong?

#include <stdio.h>
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
   	if (code != cudaSuccess) {
      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
      if (abort) exit(code);
   	}
}
  
texture<float, 2, cudaReadModeElementType> texRef;
__global__ 
void testTexture(int col, int row) {
	printf("%f\n", tex2D(texRef, col, row));
}
 
int main()
{
	float data[5][4] = 
	{
		{1,2,3,4},
		{5,6,7,8},
		{9,10,11,12},
		{13,14,15,16},
		{17,18,19,20}
	};

	size_t pitch;
	size_t offset;

	float * gpu_data;
	gpuErrchk(cudaMallocPitch((void**) &gpu_data, &pitch, 4 * sizeof(float), 5));
	cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
	gpuErrchk(cudaMemcpy2D(gpu_data, pitch, data, 4 * sizeof(float), 4, 5, cudaMemcpyHostToDevice));

	// Set texture parameters
	texRef.addressMode[0] = cudaAddressModeWrap;
	texRef.addressMode[1] = cudaAddressModeWrap;
	//texRef.filterMode = cudaFilterModeLinear;
	//texRef.normalized = true;    // access with normalized texture coordinates

	gpuErrchk(cudaBindTexture2D(0, &texRef, gpu_data, &channelDesc, 4, 5, pitch));

	testTexture<<<1, 1>>>(2, 0);
	gpuErrchk(cudaFree(gpu_data));
	return EXIT_SUCCESS;
}