I might be doing something wrong, but it appears that cudaMemGetInfo() doesn’t always return the correct amount of free memory. In the code below, I call cudaMemGetInfo() before and after cudaMalloc() and report the expected and actual memory allocated. I’m not surprised if a the amount allocated is a little bigger that what was requested, due to alignment restrictions, but here it often reports that 100 times the amount requested is allocated, or sometimes 0 bytes.
int main(void)
{
// the number of cudaMalloc() calls to make
const int n_alloc = 10;
cudaError_t err;
size_t free_bytes, total_bytes;
long free_bytes_before, free_bytes_after;
int *d_array[n_alloc];
// arbitrary allocation sizes
size_t bytes_to_alloc[n_alloc] = { 1689600, 112640, 600, 600, 600, 1800, 1800, 4, 4, 11232 };
err = cudaSetDevice(0);
for (int i = 0; i < n_alloc; i++)
{
// get the number of bytes available before allocation
deviceSynchronize();
free_bytes = getFreeBytes(2*i, &total_bytes);
free_bytes_before = (long) free_bytes;
if (!i) cout << "total bytes available = " << total_bytes << endl << endl;
// make sure we are allocating an integral number of ints
assert(!(bytes_to_alloc[i] % sizeof(int)));
err = cudaMalloc(&d_array[i], bytes_to_alloc[i]);
if (err != cudaSuccess)
{
cout << "cudaMalloc returned the error " << cudaGetErrorString(err) << endl;
exit(1);
}
// get the number of bytes available after allocation
deviceSynchronize();
free_bytes = getFreeBytes(2*i+1, &total_bytes);
free_bytes_after = (long) free_bytes;
// expected bytes that were to be allocated, and the actual amount that was allocated
long expected = (long) bytes_to_alloc[i];
long actual = free_bytes_before - free_bytes_after;
long diff = actual - expected;
cout << "Expected bytes = " << expected << " actual bytes = " << actual
<< " difference = " << diff << endl;
}
}
// -------------------------------------------------------------------------------------------
// getFreeBytes
// -------------------------------------------------------------------------------------------
size_t getFreeBytes(const int where, size_t *total_bytes)
{
size_t free_bytes;
cudaError_t err = cudaMemGetInfo(&free_bytes, total_bytes);
if (err != cudaSuccess)
{
cout << "getFreeBytes: call index " << where
<< ": cudaMemGetInfo returned the error: " << cudaGetErrorString(err) << endl;
exit(1);
}
return free_bytes;
}
// -------------------------------------------------------------------------------------------
// deviceSynchronize
// -------------------------------------------------------------------------------------------
void deviceSynchronize(void)
{
cudaError_t err = cudaDeviceSynchronize();
if (err != cudaSuccess)
{
cout << "cudaDeviceSynchronize returned the error " << cudaGetErrorString(err) << endl;
exit(1);
}
}
Here is the program output:
total bytes available = 3220373504
Expected bytes = 1689600 actual bytes = 1703936 difference = 14336
Expected bytes = 112640 actual bytes = 1048576 difference = 935936
Expected bytes = 600 actual bytes = 1048576 difference = 1047976
Expected bytes = 600 actual bytes = 0 difference = -600
Expected bytes = 600 actual bytes = 0 difference = -600
Expected bytes = 1800 actual bytes = 1048576 difference = 1046776
Expected bytes = 1800 actual bytes = 0 difference = -1800
Expected bytes = 4 actual bytes = 0 difference = -4
Expected bytes = 4 actual bytes = 0 difference = -4
Expected bytes = 11232 actual bytes = 1048576 difference = 1037344
I am using CUDA 5.0, driver 304.64 and a Tesla K10.G1.8GB card. This also happen in CUDA 4.2 with the 295.53 driver