I’m trying to load the driver API at runtime using LoadLibrary/getProcAdress. I’m using Windows 10 Pro, Visual Studio 2019, Cuda 9.2, and a RTX 2080TI.
#include "C:/CUDA/9.2/include/cuda.h"
#include <iostream>
#include <Windows.h>
#define LOAD_CUDA_FUNCTION(NAME) _##NAME = (NAME##_p)GetProcAddress(hinst, #NAME);
typedef CUresult(*cuCtxCreate_p)(CUcontext*, unsigned int, CUdevice);
typedef CUresult(*cuCtxDestroy_p)(CUcontext);
typedef CUresult(*cuDeviceGet_p)(CUdevice*, int);
typedef CUresult(*cuInit_p)(unsigned int);
typedef CUresult(*cuMemGetInfo_p)(size_t*, size_t*);
cuCtxCreate_p _cuCtxCreate;
cuCtxDestroy_p _cuCtxDestroy;
cuDeviceGet_p _cuDeviceGet;
cuInit_p _cuInit;
cuMemGetInfo_p _cuMemGetInfo;
int main(int argc, char** argv)
{
HINSTANCE hinst = LoadLibrary(TEXT("nvcuda.dll"));
LOAD_CUDA_FUNCTION(cuCtxCreate);
LOAD_CUDA_FUNCTION(cuCtxDestroy);
LOAD_CUDA_FUNCTION(cuDeviceGet);
LOAD_CUDA_FUNCTION(cuInit);
LOAD_CUDA_FUNCTION(cuMemGetInfo);
size_t free, total;
CUdevice dev;
CUcontext ctx;
_cuInit(0);
_cuDeviceGet(&dev, 0);
_cuCtxCreate(&ctx, 0, dev);
_cuMemGetInfo(&free, &total);
std::cout << total / (1024 * 1024) << "\n";
_cuCtxDestroy(ctx);
FreeLibrary(hinst);
return 0;
}
When I run the program, I get no errors, but only the lower 32 bit of total and free are being changed, the upper 32 bit remain 0xcccccccc in debug mode, i.e. total = 0xccccccccdaffffff.
Is there anything wrong with the calling convention, 32/64 bit version of the DLL, …?
bye,
loki