I’ve observed a strange issue: when I initialize a shared memory struct variable in a function called from the main kernel, the execution runs fine, but cuda-memcheck throws errors about invalid global access. The initialization works fine and the value is readable, unless I run cuda-memcheck, which crashes it with the error. Below is a simple example that reproduces the problem.
In my (much bigger) code I’m trying to separate initialization in its own function, but cuda-memcheck errors don’t add confidence.
I use CUDA 5.0 on a CentOS 6.3 with the driver version 304.54.
Please let me know what you think.
//main.cpp
include “gpu.h”
int main(int argc, char* argv){
run_kernel();
return 0;
}
//gpu.h
#ifndef GPU_H_
define GPU_H_
void run_kernel();
struct cuda_model{
int size;
};
endif /* GPU_H_ */
//gpu.cu
include <stdio.h>
include “gpu.h”
define FUNCTION 0
device shared cuda_model model;
device void init(cuda_model* model){
if (threadIdx.x == 0){
model->size = 5;
}
__syncthreads();
}
global void compute(){
if (FUNCTION){
init(&model);
}
else{
if (threadIdx.x == 0) model.size = 5;
}
}
host void run_kernel(){
compute<<<1,1>>>();
cudaDeviceSynchronize();
}
[sasha@gpudev Debug]$ cuda-memcheck ./Test
========= CUDA-MEMCHECK
========= Invalid global write of size 4
========= at 0x00000088 in /home/sasha/cuda-workspace/Test/Debug/../gpu.cu:11:init(cuda_model*)
========= by thread (0,0,0) in block (0,0,0)
========= Address 0x01000000 is out of bounds
========= Device Frame:/home/sasha/cuda-workspace/Test/Debug/../gpu.cu:19:compute(void) (compute(void) : 0x100)
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame:/usr/lib64/libcuda.so (cuLaunchKernel + 0x3dc) [0xc9edc]
========= Host Frame:/usr/local/cuda-5.0/lib64/libcudart.so.5.0 [0x11d54]
========= Host Frame:/usr/local/cuda-5.0/lib64/libcudart.so.5.0 (cudaLaunch + 0x182) [0x38152]
========= Host Frame:./Test [0x92a]
========= Host Frame:./Test [0x9b9]
========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xfd) [0x1ecdd]
========= Host Frame:./Test [0x749]
========= Program hit error 4 on CUDA API call to cudaDeviceSynchronize
========= Saved host backtrace up to driver entry point at error
========= Host Frame:/usr/lib64/libcuda.so [0x26a180]
========= Host Frame:/usr/local/cuda-5.0/lib64/libcudart.so.5.0 (cudaDeviceSynchronize + 0x1dd) [0x412dd]
========= Host Frame:./Test [0x9b9]
========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xfd) [0x1ecdd]
========= Host Frame:./Test [0x749]
========= ERROR SUMMARY: 2 errors