cudaSurfaceType3D

I am having a problem with using 3D surfaces. I create a 3D array and can bind it to a 3D texture read the values that I placed into the array from within a kernel using the texture. When I bind the same array to a 3D surface attempts to read from the surface produce 0 and when run with the cuda debugger exceptions occur but no additional information is provided. I am unable to find example code that use cudaSurfaceType3D at Nvidia or elsewhere. Please let me know if you have had trouble or success with cudaSurfaceType3D. My GPU is a 580 with driver 306.94, and I am using Cuda 5.0 with Visual Studio 2010.

Thank you

Could you paste your sample code here?

Thank you for your interest in helping. In the code below I create a 3D image that consists of an array of 2D images. I set all pixels to the same value. I use Nsight 3.0 to check the value of variables within a kernel. I transfer the 3D image to a cuda array and bind it to both a 3D surface and texture. Within a kernel the texture produces the value that I set the image to. When I read from the surface I get zero. If you need more information please let me know.

Regards,
Jay

texture<float, 3, cudaReadModeElementType> reconTex;

surface<void, cudaSurfaceType3D> reconSurf;

global static void cudaConebeamTestSurfaceKernel(int cols, int rows, int slices)
{
int col = blockIdx.x * blockDim.x + threadIdx.x;
int row = blockIdx.y * blockDim.y + threadIdx.y;

if(col >= cols || row >= rows) {return;}

for(int slice = 0; slice < slices; slice++)
{
	float cv = 0.0f;
	surf3Dread(&cv, reconSurf, col * sizeof(float), row, slice, cudaBoundaryModeZero);
	cv += 5;
	surf3Dwrite(cv, reconSurf, col * sizeof(float), row, slice, cudaBoundaryModeZero);
}

}

global static void cudaConebeamTestTextureKernel(int cols, int rows, int slices)
{
int col = blockIdx.x * blockDim.x + threadIdx.x;
int row = blockIdx.y * blockDim.y + threadIdx.y;

if(col >= cols || row >= rows) {return;}

for(int slice = 0; slice < slices; slice++)
{
	float cv = tex3D(reconTex, col + 0.5, row + 0.5, slice + 0.5);	

	float rr = cv;
}

}

void cudaSurfaceMemoryTest()
{
int cols = 256, rows = 256, slices = 256;

// Create a 3D image which consists of an array of 2D images (slices).
float** sliceArray = new float*[slices];
for(int s = 0; s < slices; s++) {sliceArray[s] = new float[cols * rows];}

for(int s = 0; s < slices; s++) 
{
	for(int r = 0; r < rows; r++) 
	{
		for(int c = 0; c < cols; c++) 
		{
			sliceArray[s][c + r * cols] = 5.444;
		}
	}
}

// Setup cuda array.
cudaArray* imgArray;
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc();
cudaExtent extent = make_cudaExtent(cols, rows, slices); 
cudaMalloc3DArray(&imgArray, &channelDesc, extent, cudaArraySurfaceLoadStore);


for(long s = 0; s < slices; s++)
{
	cudaMemcpy3DParms params = {0};
	params.srcPtr = make_cudaPitchedPtr((void*)sliceArray[s], cols * sizeof(float), cols, rows);
	params.dstArray = imgArray;
	params.extent = make_cudaExtent(cols, rows, 1);
	params.dstPos.x = 0;
	params.dstPos.y = 0;
	params.dstPos.z = s;
	params.kind = cudaMemcpyHostToDevice;
	cudaMemcpy3D(&params);
}

cudaGetChannelDesc(&channelDesc, imgArray);
cudaBindSurfaceToArray(reconSurf, imgArray, channelDesc);
//CudaUtil::checkForCudaError("");

reconTex.addressMode[0] = cudaAddressModeClamp; 
reconTex.addressMode[1] = cudaAddressModeClamp; 
reconTex.addressMode[2] = cudaAddressModeClamp;
reconTex.filterMode = cudaFilterModeLinear; 
reconTex.normalized = false; 
cudaBindTextureToArray(reconTex, imgArray, channelDesc);
//CudaUtil::checkForCudaError("");


// Set block and grid dimensions
dim3 dimBlock, dimGrid; 

dimBlock.x = 16; 
dimBlock.y = 16; 
dimBlock.z = 1; 

dimGrid.x = (uint)ceil((float)cols / (float)dimBlock.x);
dimGrid.y = (uint)ceil((float)rows / (float)dimBlock.y);
dimGrid.z = 1;

// Run kernels
for(long i = 0; i < 1; i++)
{
	printf("

%d", i);

	cudaConebeamTestTextureKernel(cols, rows, slices);
	cudaThreadSynchronize();
	//CudaUtil::checkForCudaError("A");

	cudaConebeamTestSurfaceKernel(cols, rows, slices);
	cudaThreadSynchronize();
	//CudaUtil::checkForCudaError("B");
}

cudaFreeArray(imgArray);

// Free image memory
for(int s = 0; s < slices; s++) {if(sliceArray != NULL) delete[] sliceArray[s];}
if(sliceArray != NULL) {delete[] sliceArray;}

}