Hello.
I am unsuccessfully trying to create and use Surface objects with memory allocated with cudaMallocManaged.
Here an example:
#include <cuda_runtime.h>
#include <stdio.h>
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
#define ELEMS 4
// just print 'hello' + elements in surf1
__global__ void print_kernel(cudaSurfaceObject_t surf1){
if(threadIdx.x == 0){
printf("hello\n");
for(int x=0; x<ELEMS; x++){
printf("%d\n", surf1Dread<int>(surf1, x*sizeof(int)));
}
}
}
int main(){
int surfhost1[] = {10, 20, 30, 40};
int* surfshared1;
gpuErrchk(cudaMallocManaged(&surfshared1, 4*sizeof(int)));
// copy to shared memory region
for(int i=0; i<4; i++){
surfshared1[i] = surfhost1[i];
}
// 1D integer surface
cudaSurfaceObject_t surfdev1;
cudaChannelFormatDesc channelDesc1 = cudaCreateChannelDesc<int>();
// Resource descriptor
cudaResourceDesc res_desc1;
res_desc1.resType = cudaResourceTypeLinear;
res_desc1.res.linear.devPtr = surfshared1;
res_desc1.res.linear.desc = channelDesc1;
res_desc1.res.linear.sizeInBytes = 4*sizeof(int);
gpuErrchk(cudaMemPrefetchAsync(surfshared1, 4*sizeof(int), 0));
// is it necessary or not?
gpuErrchk(cudaDeviceSynchronize());
// the program fails here
gpuErrchk(cudaCreateSurfaceObject(&surfdev1, &res_desc1));
print_kernel<<<1,1>>>(surfdev1);
gpuErrchk(cudaDeviceSynchronize());
cudaDestroySurfaceObject(surfdev1);
cudaFree(surfshared1);
}
The code above fails when cudaCreateSurfaceObject is called with the following error output “GPUassert: invalid argument test_cuda_2.cu 57”.
I am using NVIDIA CUDA toolkit V9.1.85 on Ubuntu 18.04. My graphic card is a Geforce 1060.
To compile the above example I use:
nvcc test_cuda_2.cu --gpu-architecture=sm_61 -o test_cuda_2
Thank you in advance!