thank you for your answer, here are some details :
3d array and texture init :
// create 3D array
cudaExtent extent = make_cudaExtent(W, H, Z);
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
cudaArray* cu_array=0;
checkCuda( cudaMalloc3DArray(&cu_array, &channelDesc, extent),pExec );
// copy data to 3D array
cudaMemcpy3DParms copyParams = {0};
//memory pitch
copyParams.srcPtr = make_cudaPitchedPtr((void*)d_out, extent.width*sizeof(float),extent.width , extent.height);
copyParams.dstArray = cu_array;
copyParams.extent = extent;
copyParams.kind = cudaMemcpyDeviceToDevice;
checkCuda( cudaMemcpy3D(©Params),pExec);
// set texture parameters
tex.normalized = false; // access with normalized texture coordinates
tex.filterMode = cudaFilterModeLinear; // linear interpolation
tex.addressMode[0] = cudaAddressModeWrap; // wrap texture coordinates
tex.addressMode[1] = cudaAddressModeWrap;
tex.addressMode[2] = cudaAddressModeWrap;
// bind texture to array
checkCuda(cudaBindTextureToArray(tex, cu_array, channelDesc),pExec);
and here is the for loop in the main :
for(int a = 0; a <nbangle; ++a)
{
float theta_r = -buffAngTilt[a] * (PI/180);
//
checkCuda( cudaMemset(d_out, 0, size_f), pExec); // re init
// 1st kernel
transformKernel<<<dimGrid, dimBlock>>>( d_out, //output // input data is in tex
W,
H,
Z, theta_r);
//
cudaError_t err = cudaGetLastError();
if( cudaSuccess != err) {
fprintf(pExec, " :CheckMsg() CUDA error : : (%d) %s.\n", (int)err, cudaGetErrorString( err ) );
}
// 2nd kernel
REprojectionKernel<<<dimGrid_REproj,dimBlock_REproj >>>( d_vol_proj, //in
d_out, //out
W,
H,
Z,
a);
err = cudaGetLastError();
if( cudaSuccess != err) {
fprintf(pExec, " :CheckMsg() CUDA error : : (%d) %s.\n", (int)err, cudaGetErrorString( err ) );
}
//update array content
checkCuda( cudaMemcpy3D(©Params),pExec);
//
}
françois