Hi,
I am trying to write CUDA code for processing some 3D images. All the examples in SDK work with only one 3D image. Is it possible to use two 3D images?
When I run the code below, I get following crash report:
Unhandled exception at 0x166f1f50 in Spine.exe: 0xC0000005: Access violation reading location 0x0000000008acb000.
0x0000000008acb000 does not correspond to any of the local variables.
#include <cutil_inline.h>
texture<float,3,cudaReadModeElementType> texIn, texGm;
cudaArray *arrIn, *arrGv, *arrGm;
__shared__ cudaExtent size;
__global__ void kernel(const float eps, float *gv, float *l, float *h)
{
int idx = blockDim.x * blockIdx.x + threadIdx.x;
if (idx>=size.width*size.height*size.depth)
return; //over size
int z=idx/(size.width*size.height);
int y=(idx%(size.width*size.height))/size.width;
int x=idx%size.width;
l[idx]=tex3D(texIn, x, y, z); //for start, just copy input image to outputs
h[idx]=tex3D(texGm, x, y, z);
}
//in is 3D scalar field, gm is gradient magnitude, gv is gradient vector
//space for l and h is allocated previously
void calcLHcuda(int xsize, int ysize, int zsize, const float eps, float *gm, float *gv, float *in, float *l, float *h)
{
size.width=xsize;
size.height=ysize;
size.depth=zsize;
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
cutilSafeCall( cudaMalloc3DArray ( &arrIn, &channelDesc, size ));
cudaMemcpy3DParms copyParams = {0};
copyParams.srcPtr = make_cudaPitchedPtr((void*)in, xsize*sizeof(float), xsize, ysize);
copyParams.dstArray = arrIn;
copyParams.extent = size;
copyParams.kind = cudaMemcpyHostToDevice;
cutilSafeCall( cudaMemcpy3D(©Params) );
cutilCheckMsg("Failed to allocate first 3D image");
//texIn.addressMode[0] = cudaAddressModeClamp; //Clamp is default
texIn.filterMode = cudaFilterModeLinear;
texIn.normalized = false;
cutilSafeCall( cudaBindTextureToArray(texIn, arrIn) );
cutilSafeCall( cudaMalloc3DArray ( &arrGm, &channelDesc, size ));
copyParams.srcPtr = make_cudaPitchedPtr((void*)gm, xsize*sizeof(float), xsize, ysize);
copyParams.dstArray = arrGm;
cutilSafeCall( cudaMemcpy3D(©Params) );
cutilCheckMsg("Failed to allocate second 3D image");
texGm.filterMode = cudaFilterModeLinear;
texGm.normalized = false;
cutilSafeCall( cudaBindTextureToArray(texGm, arrGm) );
kernel<<< (xsize*ysize*zsize/32+1), 32 >>>(eps, gv, l, h);
cutilSafeCall(cudaFreeArray(arrIn));
cutilSafeCall(cudaFreeArray(arrGm));
}
calcLHcuda is the function which is called from the rest of my C++ code.
Can anyone shed some light on this, or give some suggestinos?
Regards,
Dženan