Hi
I am trying to port some library to CUDA and I found a problem when I use cuda-memcheck but not in normal use. I tried to isolate it and here is a code that reproduces the error:
typedef struct {
int foo;
}
type1;
typedef struct {
int bar;
void *pt; /* can point to type1 struct or some other type of structs */
}
type2;
__global__ void _allocate(type2* t2, int foo, int bar)
{
t2->bar = bar;
type1* t1;
t2->pt = malloc(sizeof(type1));
t1 = (type1*)t2->pt;
t1->foo = foo;
}
__global__ void _free(type2* t2)
{
type1* t1 = (type1*)t2->pt;
free(t1);
}
int main(int argc, char *argv[])
{
type2* t2;
/* type2 is allocated in the main program while the rest in kernels */
cudaMalloc((void**)&t2, sizeof(type2));
_allocate<<<1, 1>>>(t2, 0, 0);
cudaThreadSynchronize();
_free<<<1, 1>>>(t2);
cudaThreadSynchronize();
}
It is compiled with nvcc -arch=sm_21 test.cu
When I run it with cuda-memcheck to check for memory errors I got the following:
========= CUDA-MEMCHECK
========= Invalid __global__ write of size 4
========= at 0x00000058 in _allocate
========= by thread (0,0,0) in block (0,0)
========= Address 0xfd009ff920 is out of bounds
=========
========= ERROR SUMMARY: 1 error
Is it a real problem or just the memcheck mistake?
Cheers