Hi everyone,
I am working on a CUDA application in which I would like to read the results from a cuFFT plan execution using a texture. To do so I have already worked out that I need to do one of two things:
- Write the output from cuFFT to the texture via a surface or
- do a 3D memory copy using a pitched pointer.
I have tried both but have gotten neither of them to work.
For the first method, I’ve gone with the following approach:
- initialization of textures and underlying arrays:
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<SSfloat>();
cudaMalloc3DArray( &d_Ux_A, &channelDesc, make_cudaExtent( m_N, m_M, m_L ), cudaArraySurfaceLoadStore );
cudaMalloc3DArray( &d_Uy_A, &channelDesc, make_cudaExtent( m_N, m_M, m_L ), cudaArraySurfaceLoadStore );
cudaMalloc3DArray( &d_Uz_A, &channelDesc, make_cudaExtent( m_N, m_M, m_L ), cudaArraySurfaceLoadStore );
struct cudaResourceDesc resDesc;
memset( &resDesc, 0, sizeof( resDesc ) );
resDesc.resType = cudaResourceTypeArray;
struct cudaTextureDesc texDesc;
memset( &texDesc, 0, sizeof( texDesc ) );
texDesc.addressMode[0] = cudaAddressModeWrap;
texDesc.addressMode[1] = cudaAddressModeWrap;
texDesc.addressMode[2] = cudaAddressModeWrap;
texDesc.filterMode = cudaFilterModeLinear;
texDesc.readMode = cudaReadModeElementType;
texDesc.normalizedCoords = true;
resDesc.res.array.array = d_Ux_A;
cudaCreateTextureObject( &d_Ux_T, &resDesc, &texDesc, NULL );
cudaCreateSurfaceObject( &d_Ux_S, &resDesc );
resDesc.res.array.array = d_Uy_A;
cudaCreateTextureObject( &d_Uy_T, &resDesc, &texDesc, NULL );
cudaCreateSurfaceObject( &d_Uy_S, &resDesc );
resDesc.res.array.array = d_Uz_A;
cudaCreateTextureObject( &d_Uz_T, &resDesc, &texDesc, NULL );
cudaCreateSurfaceObject( &d_Uz_S, &resDesc );
- Updating the surface from a kernel, U here are the cuFFT real outputs and U_S are the surfaces:
surf3Dwrite<float>( Ux[i], Ux_S, x * sizeof( float ), y, z );
surf3Dwrite<float>( Uy[i], Uy_S, x * sizeof( float ), y, z );
surf3Dwrite<float>( Uz[i], Uz_S, x * sizeof( float ), y, z );
- Reading from the (apparently not updated?) textures - this happens in a different kernel:
const float cux = tex3D<float>( Ux_T, sampleX, sampleY, sampleZ );
const float cuy = tex3D<float>( Uy_T, sampleX, sampleY, sampleZ );
const float cuz = tex3D<float>( Uz_T, sampleX, sampleY, sampleZ );
The second method was done as shown below. Unfortunately it shows similar lack of results
cudaMemcpy3DParms copyParams = { 0 };
copyParams.kind = cudaMemcpyDeviceToDevice;
copyParams.extent = make_cudaExtent( m_N, m_M, m_L );
cudaPitchedPtr xPtr = make_cudaPitchedPtr( d_Ux_r, m_N * sizeof( float ), m_N, m_M );
cudaPitchedPtr yPtr = make_cudaPitchedPtr( d_Uy_r, m_N * sizeof( float ), m_N, m_M );
cudaPitchedPtr zPtr = make_cudaPitchedPtr( d_Uz_r, m_N * sizeof( float ), m_N, m_M );
copyParams.dstArray = d_Ux_A;
copyParams.srcPtr = xPtr;
cudaMemcpy3D( ©Params );
copyParams.dstArray = d_Uy_A;
copyParams.srcPtr = yPtr;
cudaMemcpy3D( ©Params );
copyParams.dstArray = d_Uz_A;
copyParams.srcPtr = zPtr;
cudaMemcpy3D( ©Params );
cudaDeviceSynchronize();
// Reading from kernel happens after this, same way as above
Is there something obvious I am missing here? I’ve gone through the documentation quite a few times now and I feel like I have done it correctly.