Hi @all!
In my program cudaGetDeviceProperties delivers weird results in the last 6 parameters (see listing below):
[codebox]/* getcuda.c
*/
#include “stdheader.h”
#ifndef GETENV
#define GETENV _STD_C_GETENV
#if IS_MSWIN
#define CUDA_LIBPATH “CUDA_BIN_PATH”
#if IS_64BIT
#define CUDA_DLL "cudart64.dll"
#define CUDA_DLL "cudart.dll"
#else /* UNIX, what else */
#include <dlfcn.h>
typedef void* HMODULE; /* make UNIX compatible with MSWin to avoid MS-VC complains */
#define CUDA_DLL “libcudart.so”
#ifndef DRIVER_TYPES_H
struct cudaDeviceProp
{
char name[256];
size_t totalGlobalMem;
size_t sharedMemPerBlock;
int regsPerBlock;
int warpSize;
size_t memPitch;
int maxThreadsPerBlock;
int maxThreadsDim[3];
int maxGridSize[3];
size_t totalConstMem;
int major;
int minor;
int clockRate;
size_t textureAlignment;
int deviceOverlap;
int multiProcessorCount;
int kernelExecTimeoutEnabled;
int integrated;
int canMapHostMemory;
int computeMode;
int __cudaReserved[36];
};
typedef int cudaError_t;
typedef struct _CUDA
{
HMODULE handle; /* handle to the shared library */
cudaError_t (*cudaGetDeviceProperties) (struct cudaDeviceProp *prop, int dev);
cudaError_t (*cudaGetLastError) (void);
const char *(*cudaGetErrorString) (cudaError_t err);
cudaError_t (*cudaMalloc) (void **devPtr, size_t count);
cudaError_t (*cudaFree) (void *devPtr);
cudaError_t (*cudaMemcpy) (void *dst, const void *src, size_t count, int kind);
/*
* .. add more function pointers here
*/
int ndev; /* no of cuda devices */
} CUDA;
#if IS_MSWIN
#define CUDA_GETFP(_name) *((FARPROC *)(&fp._name)) = GetProcAddress(handle,#_name);
#define CUDA_GETFP(_name) fp.vp = dlsym(handle,#_name);
#define CUDA_LINKFP(_name) \
CUDA_GETFP(_name)\
if (!fp._name)\
{\
fputs("getcuda: function " #_name "() not available.\n",stderr);\
return NULL;\
}
#define CUDA_HOOK(_name) \
CUDA_LINKFP(_name)\
cuda._name = fp._name
/************************************************************
****************************/
C_FUNC_PREFIX const CUDA *getcuda(void)
/************************************************************
****************************/
{
static CUDA cuda = {0,0,0};
HMODULE handle;
union
{
void *vp;
cudaError_t (*cudaGetDeviceCount) (int *count);
cudaError_t (*cudaGetDeviceProperties) (struct cudaDeviceProp *prop, int dev);
cudaError_t (*cudaGetLastError) (void);
const char *(*cudaGetErrorString) (cudaError_t err);
cudaError_t (*cudaMalloc) (void **devPtr, size_t count);
cudaError_t (*cudaFree) (void *devPtr);
cudaError_t (*cudaMemcpy) (void *dst, const void *src, size_t count, int kind);
/*
* .. add more function pointers here
*/
} fp;
cudaError_t err;
int ndev = 0;
if (cuda.handle && cuda.ndev > 0) /* already loaded */
return &cuda;
#if IS_MSWIN
{
TCHAR *libpath;
TCHAR dllpath[4096];
libpath = getenv(TEXT(CUDA_LIBPATH));
if (!libpath)
{
fprintf(stderr,"getcuda: environment variable \"%s\" not defined.\n",CUDA_LIBPATH);
return NULL;
}
_snprintf(dllpath,countof(dllpath),TEXT(“%s\”) TEXT(CUDA_DLL),libpath);
handle = LoadLibrary(dllpath);
}
handle = dlopen(CUDA_DLL,RTLD_LAZY);
if (!handle)
{
fprintf(stderr,"getcuda: no CUDA library \"%s\".\n",CUDA_DLL);
return NULL;
}
CUDA_LINKFP(cudaGetDeviceCount)
err = fp.cudaGetDeviceCount(&ndev);
if (err)
{
fprintf
(
stderr,
"cudaGetDeviceCount: error #%d (%s).\n",
err,cuda.cudaGetErrorString(err)
);
return NULL;
}
if (ndev <= 0) /* no cuda hardware installed */
{
fprintf(stderr,"getcuda: no CUDA hardware (ndev=%d).\n",ndev);
return NULL;
}
CUDA_HOOK(cudaGetDeviceProperties);
CUDA_HOOK(cudaGetLastError);
CUDA_HOOK(cudaGetErrorString);
CUDA_HOOK(cudaMalloc);
CUDA_HOOK(cudaFree);
CUDA_HOOK(cudaMemcpy);
/*
* .. add more function pointers here
*/
/* success */
cuda.handle = handle;
cuda.ndev = ndev;
return &cuda;
}
/************************************************************
****************************/
C_FUNC_PREFIX int getcudadevicebyname(const CUDA *cuda, const char *devname)
/************************************************************
****************************/
{
struct cudaDeviceProp prop;
int i,ndev;
if (!cuda)
return -1;
if (!STRHASLEN(devname)) /* NULL or empty filename: read environment CUDA_DEVICE */
devname = GETENV("CUDA_DEVICE");
else if (devname[0] == ‘$’) /* $ENVNAME: read environment ENVNAME */
devname = GETENV(devname+1);
if (STRHASLEN(devname))
{
ndev = cuda->ndev;
for(i=0; i<ndev; i++)
if (!cuda->cudaGetDeviceProperties(&prop,i) && strstr(prop.name,devname))
return i;
}
return -1;
}
/************************************************************
****************************/
C_FUNC_PREFIX int getcudadevicebymemory(const CUDA *cuda)
/************************************************************
****************************/
{
struct cudaDeviceProp prop;
size_t maxm = 0;
int i,ndev,idev = -1;
if (!cuda)
return -1;
ndev = cuda->ndev;
for(i=0; i<ndev; i++)
if (!cuda->cudaGetDeviceProperties(&prop,i) && prop.totalGlobalMem > maxm)
{
idev = i;
maxm = prop.totalGlobalMem;
}
return idev;
}
/************************************************************
****************************/
#if 1
int main(void)
{
const CUDA *cuda = getcuda();
struct cudaDeviceProp prop;
int i,ndev;
if (!cuda)
{
puts("CUDA is not available.");
return 1;
}
printf(“CUDA hardware is available: %d devices.\n”,ndev=cuda->ndev);
for(i=0; i<ndev; i++)
{
cudaError_t err = cuda->cudaGetDeviceProperties(&prop,i);
if (err)
{
printf
(
"cudaGetDeviceProperties: error #%d (%s).\n",
err,cuda->cudaGetErrorString(err)
);
}
else
{
printf
(
"\nDevice %d: \"%s\"\n"
" Total global memory (MB) : %d\n"
" Shared memory per block (KB) : %d\n"
" Warp size : %d\n"
" Max. memory pitch : %d\n"
" Max. threads per block : %d\n"
" Max. threads dimension : %d x %d x %d\n"
" Max. grid size : %d x %d x %d\n"
" Total constant memory (KB) : %d\n"
" Compute capability : %d.%d\n"
" Clock rate (KHz) : %d\n"
" Texture alignment : %d\n"
" Device overlap : %d\n"
" Multiprocessors on device : %d\n"
" Kernel exec timeout enabled : %d\n"
" Integrated : %d\n"
" Can map host memory : %d\n"
" Compute mode : %d\n"
, i
, prop.name
, prop.totalGlobalMem/(1024*1024)
, prop.sharedMemPerBlock/1024
, prop.warpSize
, prop.memPitch
, prop.maxThreadsPerBlock
, prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]
, prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]
, prop.totalConstMem
, prop.major, prop.minor
, prop.clockRate
, prop.textureAlignment
, prop.deviceOverlap
, prop.multiProcessorCount
, prop.kernelExecTimeoutEnabled
, prop.integrated
, prop.canMapHostMemory
, prop.computeMode
);
}
}
printf(“\nBest CUDA devicebymemory = %d.\n”,getcudadevicebymemory(cuda));
printf(“\nBest CUDA devicebyname = %d.\n”,getcudadevicebyname(cuda,NULL));
return 0;
}
[/codebox]