Hi All,
We have the following issue with CUDA 4.2 and 680GTX – if a large piece of memory is allocated by cudaMalloc(), not all memory is deallocated by a subsequent cudaFree() call. The “large piece” means something of the order of 1GB. Everything is fine if 1MB or so is allocated.
It doesn’t look like a big deal except that presumably it leads to memory fragmentation which is lethal for our code. Here is a simple snippet reproducing the problem. Any ideas what’s going on? Thanks!
#include <stdio.h>
#include <cuda.h>
__global__ void null()
{
}
int main(int argc, char** argv)
{
cudaSetDevice(0);
null <<< 1, 1 >>> ();
CUresult CUStat;
size_t free, total;
size_t free1, total1;
CUStat=cuMemGetInfo(&free, &total);
printf("err code = %d\n", CUStat);
int* d_data;
cudaMalloc((void**)&d_data, 1024*1024*1024);
cudaFree(d_data);
CUStat=cuMemGetInfo(&free1, &total1);
printf("err code = %d\n", CUStat);
printf("lost %d bytes\n", free-free1);
return 0;
}
In my case the output is:
err code = 0
err code = 0
lost 0 bytes (1MB chunk)
err code = 0
err code = 0
lost 3145728 bytes (1GB chunk)