Hi Everyone.
I have troubles reading a 2D layered surface object. I did a simple test that uploads a 2X2X2 float cube and tries to print the contained values. But I get multiple errors at runtime.
The code:
// test to understand how 2DLayered cudaSurfaceObject works
// A 2x2x2 cube of float values is created and then transferred to the GPU
// thread 0 should print all the cube values
#include <cuda_runtime.h>
#include <helper_cuda.h>
__global__ void print_kernel(cudaSurfaceObject_t surf)
{
// simply cycle the 2x2x2 cube represented by surf and print it with the index
// only thread 0 runs
if(threadIdx.x == 0){
int i, j, k;
for(i = 0; i<2; i++){
for(j = 0; j<2; j++){
for(k = 0; k < 2; k++){
float res;
//read surface element and then print it
surf2DLayeredread<float>(&res, surf, j, k, i);
printf(" layer:%d u:%d v:%d val: %f\n", i, j, k, res);
}
}
}
}
}
int main(int argc, char **argv)
{
// memory to be used on the gpu
float cube_array[8] = {1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8};
// create a channel of a single float
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
cudaArray *dev_cu_array;
cudaSurfaceObject_t surf;
cudaResourceDesc res_desc;
// extent of the2X2X2 float cube
cudaExtent extent = make_cudaExtent(2 * sizeof(float), 2, 2);
//allocate memory on the layered array.
checkCudaErrors(cudaMalloc3DArray(&dev_cu_array,
&channelDesc,
extent,
cudaArrayLayered));
//copy 8 floats from cube_array t0 dev_cu_array
checkCudaErrors(cudaMemcpyToArray(dev_cu_array,
0,
0,
cube_array,
8*sizeof(float),
cudaMemcpyHostToDevice));
// Resource is an array
res_desc.resType = cudaResourceTypeArray;
res_desc.res.array.array = dev_cu_array;
checkCudaErrors(cudaCreateSurfaceObject(&surf, &res_desc));
// only one thread is needed
print_kernel<<<1,1>>>(surf);
// destroy memory
checkCudaErrors(cudaDestroySurfaceObject(surf));
checkCudaErrors(cudaFreeArray(dev_cu_array)); // why this fails?!
}
With this code I got the following output:
layer:0 u:0 v:0 val: 1.100000
layer:0 u:0 v:1 val: 0.000000
CUDA error at simpleTexture.cu:66 code=74(cudaErrorMisalignedAddress) "cudaFreeArray(dev_cu_array)"
So, there are two issues:
- the device code is unable to read values with layer/u coordinates different from 0
- host code fails to deallocate dev_cu_array. I was very surprised about this. Am I corrupting the memory or what?
I’m using a Geforce GTX 1060 (with compute_60 ) with Ubuntu 18.04 and toolkit V9.1.85
Thank you very much for your help!