I have a stack of 2d images. I want to use a 3D texture to store them and read them one by one in a kernel. But when I run the test .m file . The Matlab crashes. I do not know whether I missed something about 3D texture.
The CUDA code is just like this:
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "mex.h"
#include "matrix.h"
#include <cutil.h>
typedef unsigned int uint;
texture<float, 3, cudaReadModeElementType> tex; // 3D texture
cudaArray *d_volumeArray = 0;
/***************************************************************************
* Read**Kernel : read one layer in 3D texture
**************************************************************************/
__global__ void
ReadKernel(float *d_output, uint imageW, uint imageH, float w)
{
uint x = __umul24(blockIdx.x, blockDim.x) + threadIdx.x;
uint y = __umul24(blockIdx.y, blockDim.y) + threadIdx.y;
// read from 3D texture
float voxel = tex3D(tex, x, y, w);
if ((x < imageW) && (y < imageH)) {
uint i = __umul24(y, imageW) + x;
d_output[i] = voxel;
}
}
void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[] )
{
float* images = (float *)mxGetPr(prhs[0]);
int n_i = mxGetScalar(prhs[1]);
int width = mxGetScalar(prhs[2]);
int height = mxGetScalar(prhs[3]);
const cudaExtent volumeSize = make_cudaExtent( width, height, n_i); // size for images stack
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
CUDA_SAFE_CALL( cudaMalloc3DArray(&d_volumeArray, &channelDesc, volumeSize) );
// copy data to 3D array
cudaMemcpy3DParms copyParams = {0};
copyParams.srcPtr = make_cudaPitchedPtr((void*)images, volumeSize.width*sizeof(float), volumeSize.width, volumeSize.height);
copyParams.dstArray = d_volumeArray;
copyParams.extent = volumeSize;
copyParams.kind = cudaMemcpyHostToDevice;
CUDA_SAFE_CALL( cudaMemcpy3D(©Params) );
// set texture parameters
tex.normalized = false;
tex.filterMode = cudaFilterModePoint; //use cudaFilterModePoint filter mode to avoid fetching from different textures layers.
tex.addressMode[0] = cudaAddressModeClamp;
tex.addressMode[1] = cudaAddressModeClamp;
tex.addressMode[2] = cudaAddressModeClamp;
// bind array to 3D texture
CUDA_SAFE_CALL(cudaBindTextureToArray(tex, d_volumeArray, channelDesc));
float* d_output = NULL;
CUDA_SAFE_CALL( cudaMalloc( (void**) &d_output, (width*height*sizeof(float))));
float *h_odata = NULL;
/* get a pointer to the output */
const mwSize dims[]={width,height,n_i};
plhs[0] = mxCreateNumericArray(3,dims,mxDOUBLE_CLASS,mxREAL);
h_odata = (float *)mxGetPr(plhs[0]);
dim3 dimBlock1(8, 8, 1);
dim3 dimGrid1((width+ dimBlock1.x -1) / dimBlock1.x, (height+ dimBlock1.y -1) / dimBlock1.y, 1);
for (int i = 0; i < n_i; i++)
{
// read in 3D texture one layer to get one image
ReadKernel<<< dimGrid1, dimBlock1 >>>( d_output, width, height, i);
CUDA_SAFE_CALL( cudaThreadSynchronize());
CUDA_SAFE_CALL( cudaMemcpy(h_odata+i*width*height*sizeof(float),d_output, width*height*sizeof(float),cudaMemcpyDeviceToHost)); // memory copy
}
// clear memory
CUDA_SAFE_CALL(cudaFree(d_output));
CUDA_SAFE_CALL(cudaFreeArray(d_volumeArray));
}
/code]
and the test .m code is just like this
[code]
m=imread('cameraman.tif');
m=imresize(m,[64 64]);
images(:,:,1) = m ; % the first image
images(:,:,2) = m ; % the second image
CC = test_3dtexture(images,2,64,64);
I checked line by line for many times but still could not find the error. Someone told me to add the unbind texture operation before cudaFree, but still crashes. Could anyone help me? Thank you!!!