Issues with tex3D

I have a quadro M770 which I’m trying to run the following convolution kernel on:
global void Convolve3DEdgeExtractionTexture(cudaPitchedPtr d_FilterResponse, DataSize dataSizeVolume, DataSize dataSizeFilter, int blocksInY, float invBlocksInY, int dataWidthTimesDataHeight)
{
unsigned int blockIdxz = __float2uint_rd(blockIdx.y * invBlocksInY);
unsigned int blockIdxy = blockIdx.y - __umul24(blockIdxz,blocksInY);
volatile int x = __umul24(blockIdx.x,blockDim.x) + threadIdx.x;
volatile int y = __umul24(blockIdxy ,blockDim.y) + threadIdx.y;
volatile int z = __umul24(blockIdxz ,blockDim.z) + threadIdx.z;

if (x < dataSizeVolume.dataWidth && y < dataSizeVolume.dataHeight && z < dataSizeVolume.dataDepth) {
	int xOffset;
	int yOffset;
	int zOffset;
	
	float sum = 0.0f;
	

	zOffset = -(dataSizeFilter.dataDepth - 1)/2;
	for (int filterZ = dataSizeFilter.dataDepth - 1; filterZ >= 0; filterZ--) {
		yOffset = -(dataSizeFilter.dataHeight - 1)/2;
		for (int filterY = dataSizeFilter.dataHeight - 1; filterY >= 0; filterY--) {
			xOffset = -(dataSizeFilter.dataWidth - 1)/2;
			for (int filterX = dataSizeFilter.dataWidth - 1; filterX >= 0; filterX--) {
				[b]sum += c_Convolution3DFilter[filterX + filterY * dataSizeFilter.dataWidth + filterZ * dataSizeFilter.dataWidth * dataSizeFilter.dataHeight] * tex3D(convolution3DTexture, x + xOffset + 0.5f, y + yOffset + 0.5f, z + zOffset + 0.5f);[/b]

				xOffset++;
			}
			yOffset++;
		}
		zOffset++;
	}
    
	size_t offSet = x * sizeof(float) + y * d_FilterResponse.pitch + z * d_FilterResponse.pitch * dataSizeVolume.dataHeight;
	
	*(float *)((char *)d_FilterResponse.ptr + offSet) = sum;
}

}

But this fails in the line with bold since a CUDA error is thrown. However if I remove + zOffset from the texture call then it works but produces an incorrect result. What am I doing wrong?

The texture is initialized with the following code:

texture<float, 3, cudaReadModeElementType> convolution3DTexture;

cudaArray *convolution3DArray;

void InitializeAndBindConvolution3DTexture(cudaPitchedPtr d_Input, DataSize dataSize) {
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc();
CheckCUDAError();

// Set texture parameters
convolution3DTexture.addressMode[0] = cudaAddressModeClamp;
convolution3DTexture.addressMode[1] = cudaAddressModeClamp;
convolution3DTexture.addressMode[2] = cudaAddressModeClamp;
convolution3DTexture.normalized = false;
convolution3DTexture.filterMode = cudaFilterModeLinear;

// Allocate 3D array for modified volume (for fast interpolation)
cudaExtent textSize = make_cudaExtent(dataSize.dataWidth, dataSize.dataHeight, dataSize.dataDepth);
cudaMalloc3DArray(&convolution3DArray, &channelDesc, textSize);
CheckCUDAError();

// Copy modified volume data to the 3D array
cudaMemcpy3DParms copyParams = {0};
copyParams.srcPtr   = d_Input;
copyParams.dstArray = convolution3DArray;
copyParams.extent   = textSize;
copyParams.kind     = cudaMemcpyDeviceToDevice;
cudaMemcpy3D(&copyParams);
CheckCUDAError();

// Bind the array to the 3D texture
cudaBindTextureToArray(convolution3DTexture, convolution3DArray, channelDesc);
CheckCUDAError();

}

/Danne

There are a number of things I don’t understand about your code :)

Why are x,y,z volatile?
The logic behind your definition of x,y,z isn’t obvious from what you’ve shown us. I assume they are all > 0

“But this fails in the line with bold since a CUDA error is thrown.”

Do you mean that the kernel throws an error and you assume it is this line because it has a CUDA call in it?

Why did you try removing zOffset? What happens if you remove xOffset or yOffset?