Can somebody please shed some light on the following situation? malloc_test.cu seems to work when invoked on its own, but I have been having weird termination errors in some other codes, so I need to know whether this might be a real bug.
malfunct@Centauri:~/Code/examachine/cuda/test$ nvcc -arch sm_20 malloc_test.cu -o malloc_test
malfunct@Centauri:~/Code/examachine/cuda/test$ cuda-memcheck ./malloc_test
========= CUDA-MEMCHECK
========= Invalid __global__ write of size 1
========= at 0x00000050 in mallocTest
========= by thread (0,0,0) in block (0,0)
========= Address 0xfd0831fe20 is out of bounds
=========
========= ERROR SUMMARY: 1 error
malfunct@Centauri:~/Code/examachine/cuda/test$ cat malloc_test.cu //-*-c++-*-x
#include <stdio.h>
#include <stdlib.h>
__device__ __host__ void mallocTest2()
{
char* ptr = (char*)malloc(123);
}
__global__ void mallocTest()
{
char* ptr = (char*)malloc(123);
ptr[0]='a';
ptr[1]='b';
ptr[2]=0;
printf("%s\n",ptr);
mallocTest2();
printf("Thread %d got pointer: %p\n", threadIdx.x, ptr);
}
int main()
{
// Set a heap size of 128 megabytes. Note that this must
// be done before any kernel is launched.
cudaThreadSetLimit(cudaLimitMallocHeapSize, 128*1024*1024);
mallocTest<<<1, 5>>>();
cudaThreadSynchronize();
return 0;
}
malfunct@Centauri:~/Code/examachine/cuda/test$ ./malloc_test
ab
ab
ab
ab
ab
Thread 0 got pointer: 0xfd0831fe20
Thread 1 got pointer: 0xfd0831fec0
Thread 2 got pointer: 0xfd0831ff60
Thread 3 got pointer: 0xfd083197c0
Thread 4 got pointer: 0xfd08319720