maybe I’m missing something but I have some trouble with the following code. I’m trying to allocate an as large as possible chunk of memory in the beginning of my program using cublas to avoid memory fragmentation. I try to determine the biggest available chunk of continuous memory by decreasing the block size beginning at 2 GB down to 0 bytes. The desired result is the allocation of something between 80% and 90% of the total available memory. However, on my Windows 7 64 bit, this fails. I think that all subsequent calls to cublasAlloc fail once a call has failed because I tried to request too much memory.
I have tried driver versions 296.10 and the current beta version 301.24, both with the same results.
I should mention that the code runs perfectly fine on the same machine under Gentoo Linux x86_64, using nvidia-drivers-295.40.
Can anyone reproduce these results? Or maybe tell me a setup where this code works as desired?
I’m using a GTX 550 Ti.
Cheers
#include <iostream>
#include <cublas.h>
using namespace std;
inline void checkStatus(cublasStatus s) {
if (s != CUBLAS_STATUS_SUCCESS) {
switch (s) {
case CUBLAS_STATUS_ALLOC_FAILED:
cerr << "CUBLAS_STATUS_ALLOC_FAILED" << endl;
break;
case CUBLAS_STATUS_ARCH_MISMATCH:
cerr << "CUBLAS_STATUS_ARCH_MISMATCH" << endl;
break;
case CUBLAS_STATUS_EXECUTION_FAILED:
cerr << "CUBLAS_STATUS_EXECUTION_FAILED" << endl;
break;
case CUBLAS_STATUS_INTERNAL_ERROR:
cerr << "CUBLAS_STATUS_INTERNAL_ERROR" << endl;
break;
case CUBLAS_STATUS_INVALID_VALUE:
cerr << "CUBLAS_STATUS_INVALID_VALUE" << endl;
break;
case CUBLAS_STATUS_MAPPING_ERROR:
cerr << "CUBLAS_STATUS_MAPPING_ERROR" << endl;
break;
case CUBLAS_STATUS_NOT_INITIALIZED:
cerr << "CUBLAS_STATUS_NOT_INITIALIZED" << endl;
break;
default:
cerr << "CUBLAS_UNKNOWN_ERROR" << endl;
}
};
}
int main(int argc, char* argv[]) {
cublasStatus result;
result = cublasInit();
checkStatus(result);
void* ptr = NULL;
int n = 1024 * 1024 * 100;
result = cublasAlloc(n, 1, &ptr);
if (result == CUBLAS_STATUS_SUCCESS && ptr)
cout << "memory allocation of " << n / 1024 / 1024 << " MB successful." << endl;
else
checkStatus(result);
if (ptr)
cublasFree(ptr);
n = numeric_limits<int>::max();
while (n > 0) {
result = cublasAlloc(n, 1, &ptr);
if (result != CUBLAS_STATUS_ALLOC_FAILED)
break;
cerr << "mermory allocation of " << n / 1024 / 1024 << " MB failed." << endl;
n -= 10 * 1024*1024;
}
checkStatus(result);
if (n <= 0 || ptr == NULL) {
cerr << "Memory allocation failed!" << endl;
}
if (ptr)
cublasFree(ptr);
result = cublasShutdown();
checkStatus(result);
return 0;
}
Try calling [font=“Courier New”]cudaGetLastError()[/font] after the failing allocation to reset the error. Otherwise all following CUDA calls will keep returning the error code of the failed call.