I have a stack of 2d images. I want to use a 3D texture to store them and read them one by one in a kernel. But when I run the test .m file . The Matlab crashes. I do not know whether I missed something about 3D texture.
The CUDA code is just like this:
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "mex.h"
#include "matrix.h"
#include <cutil.h>
typedef unsigned int uint;
texture<float, 3, cudaReadModeElementType> tex; // 3D texture
cudaArray *d_volumeArray = 0;
/***************************************************************************
* Read**Kernel : read one layer in 3D texture
**************************************************************************/
__global__ void
ReadKernel(float *d_output, uint imageW, uint imageH, float w)
{
uint x = __umul24(blockIdx.x, blockDim.x) + threadIdx.x;
uint y = __umul24(blockIdx.y, blockDim.y) + threadIdx.y;
// read from 3D texture
float voxel = tex3D(tex, x, y, w);
if ((x < imageW) && (y < imageH)) {
uint i = __umul24(y, imageW) + x;
d_output[i] = voxel;
}
}
void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[] )
{
float* images = (float *)mxGetPr(prhs[0]);
int n_i = mxGetScalar(prhs[1]);
int width = mxGetScalar(prhs[2]);
int height = mxGetScalar(prhs[3]);
const cudaExtent volumeSize = make_cudaExtent( width, height, n_i); // size for images stack
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
CUDA_SAFE_CALL( cudaMalloc3DArray(&d_volumeArray, &channelDesc, volumeSize) );
// copy data to 3D array
cudaMemcpy3DParms copyParams = {0};
copyParams.srcPtr = make_cudaPitchedPtr((void*)images, volumeSize.width*sizeof(float), volumeSize.width, volumeSize.height);
copyParams.dstArray = d_volumeArray;
copyParams.extent = volumeSize;
copyParams.kind = cudaMemcpyHostToDevice;
CUDA_SAFE_CALL( cudaMemcpy3D(©Params) );
// set texture parameters
tex.normalized = false;
tex.filterMode = cudaFilterModePoint; //use cudaFilterModePoint filter mode to avoid fetching from different textures layers.
tex.addressMode[0] = cudaAddressModeClamp;
tex.addressMode[1] = cudaAddressModeClamp;
tex.addressMode[2] = cudaAddressModeClamp;
// bind array to 3D texture
CUDA_SAFE_CALL(cudaBindTextureToArray(tex, d_volumeArray, channelDesc));
float* d_output = NULL;
CUDA_SAFE_CALL( cudaMalloc( (void**) &d_output, (width*height*sizeof(float))));
float *h_odata = NULL;
/* get a pointer to the output */
const mwSize dims[]={width,height,n_i};
plhs[0] = mxCreateNumericArray(3,dims,mxDOUBLE_CLASS,mxREAL);
h_odata = (float *)mxGetPr(plhs[0]);
dim3 dimBlock1(8, 8, 1);
dim3 dimGrid1((width+ dimBlock1.x -1) / dimBlock1.x, (height+ dimBlock1.y -1) / dimBlock1.y, 1);
for (int i = 0; i < n_i; i++)
{
// read in 3D texture one layer to get one image
ReadKernel<<< dimGrid1, dimBlock1 >>>( d_output, width, height, i);
CUDA_SAFE_CALL( cudaThreadSynchronize());
CUDA_SAFE_CALL( cudaMemcpy(h_odata+i*width*height*sizeof(float),d_output, width*height*sizeof(float),cudaMemcpyDeviceToHost)); // memory copy
}
// clear memory
CUDA_SAFE_CALL(cudaFree(d_output));
CUDA_SAFE_CALL(cudaFreeArray(d_volumeArray));
}
and the test .m code is just like this
m=imread('cameraman.tif');
m=imresize(m,[64 64]);
images(:,:,1) = m ; % the first image
images(:,:,2) = m ; % the second image
CC = test_3dtexture(images,2,64,64);
Could anyone help me? Thank you !!