cudaGetDeviceProperties - wrong results

Hi @all!

In my program cudaGetDeviceProperties delivers weird results in the last 6 parameters (see listing below):

[codebox]

/* getcuda.c





*/

#include “stdheader.h”

#ifndef GETENV

#define GETENV _STD_C_GETENV

#endif

#if IS_MSWIN

#define CUDA_LIBPATH “CUDA_BIN_PATH”

#if IS_64BIT

  #define CUDA_DLL   "cudart64.dll"

#else

  #define CUDA_DLL   "cudart.dll"

#endif

#else /* UNIX, what else */

#include <dlfcn.h>

typedef void* HMODULE; /* make UNIX compatible with MSWin to avoid MS-VC complains */

#define CUDA_DLL “libcudart.so”

#endif

#ifndef DRIVER_TYPES_H

struct cudaDeviceProp

{

  char     name[256];

  size_t   totalGlobalMem;

  size_t   sharedMemPerBlock;

  int      regsPerBlock;

  int      warpSize;

  size_t   memPitch;

  int      maxThreadsPerBlock;

  int      maxThreadsDim[3];

  int      maxGridSize[3];

  size_t   totalConstMem;

  int      major;

  int      minor;

  int      clockRate;

  size_t   textureAlignment;

  int      deviceOverlap;

  int      multiProcessorCount;

  int      kernelExecTimeoutEnabled;

  int      integrated;

  int      canMapHostMemory;

  int      computeMode;

int __cudaReserved[36];

};

#endif

typedef int cudaError_t;

typedef struct _CUDA

{

HMODULE handle; /* handle to the shared library */

cudaError_t (*cudaGetDeviceProperties) (struct cudaDeviceProp *prop, int dev);

cudaError_t (*cudaGetLastError) (void);

const char *(*cudaGetErrorString) (cudaError_t err);

cudaError_t (*cudaMalloc) (void **devPtr, size_t count);

cudaError_t (*cudaFree) (void *devPtr);

cudaError_t (*cudaMemcpy) (void *dst, const void *src, size_t count, int kind);

/*

* .. add more function pointers here

*/

int ndev; /* no of cuda devices */

} CUDA;

#if IS_MSWIN

#define CUDA_GETFP(_name) *((FARPROC *)(&fp._name)) = GetProcAddress(handle,#_name);

#else

#define CUDA_GETFP(_name) fp.vp = dlsym(handle,#_name);

#endif

#define CUDA_LINKFP(_name) \

CUDA_GETFP(_name)\

if (!fp._name)\

{\

  fputs("getcuda: function " #_name "() not available.\n",stderr);\

  return NULL;\

}

#define CUDA_HOOK(_name) \

CUDA_LINKFP(_name)\

cuda._name = fp._name

/************************************************************

****************************/

C_FUNC_PREFIX const CUDA *getcuda(void)

/************************************************************

****************************/

{

static CUDA cuda = {0,0,0};

HMODULE handle;

union

{

  void         *vp;

  cudaError_t (*cudaGetDeviceCount)      (int *count);

  cudaError_t (*cudaGetDeviceProperties) (struct cudaDeviceProp *prop, int dev);

  cudaError_t (*cudaGetLastError)        (void);

  const char *(*cudaGetErrorString)      (cudaError_t err);

  cudaError_t (*cudaMalloc)              (void **devPtr, size_t count);

  cudaError_t (*cudaFree)                (void *devPtr);

  cudaError_t (*cudaMemcpy)              (void *dst, const void *src, size_t count, int kind);

  /*

   * .. add more function pointers here

   */

} fp;

cudaError_t err;

int ndev = 0;

if (cuda.handle && cuda.ndev > 0) /* already loaded */

  return &cuda;

#if IS_MSWIN

{

TCHAR *libpath;

TCHAR dllpath[4096];

libpath = getenv(TEXT(CUDA_LIBPATH));

if (!libpath)

{

  fprintf(stderr,"getcuda: environment variable \"%s\" not defined.\n",CUDA_LIBPATH);

  return NULL;

}

_snprintf(dllpath,countof(dllpath),TEXT("%s\") TEXT(CUDA_DLL),libpath);

handle = LoadLibrary(dllpath);

}

#else

handle = dlopen(CUDA_DLL,RTLD_LAZY);

#endif

if (!handle)

{

  fprintf(stderr,"getcuda: no CUDA library \"%s\".\n",CUDA_DLL);

  return NULL;

}

CUDA_LINKFP(cudaGetDeviceCount)

err = fp.cudaGetDeviceCount(&ndev);

if (err)

{

  fprintf

  (

     stderr,

     "cudaGetDeviceCount: error #%d (%s).\n",

     err,cuda.cudaGetErrorString(err)

  );

  return NULL;

}

if (ndev <= 0) /* no cuda hardware installed */

{

  fprintf(stderr,"getcuda: no CUDA hardware (ndev=%d).\n",ndev);

  return NULL;

}

CUDA_HOOK(cudaGetDeviceProperties);

CUDA_HOOK(cudaGetLastError);

CUDA_HOOK(cudaGetErrorString);

CUDA_HOOK(cudaMalloc);

CUDA_HOOK(cudaFree);

CUDA_HOOK(cudaMemcpy);

/*

* .. add more function pointers here

*/

/* success */

cuda.handle = handle;

cuda.ndev = ndev;

return &cuda;

}

/************************************************************

****************************/

C_FUNC_PREFIX int getcudadevicebyname(const CUDA *cuda, const char *devname)

/************************************************************

****************************/

{

struct cudaDeviceProp prop;

int i,ndev;

if (!cuda)

  return -1;

if (!STRHASLEN(devname)) /* NULL or empty filename: read environment CUDA_DEVICE */

  devname = GETENV("CUDA_DEVICE");

else if (devname[0] == ‘$’) /* $ENVNAME: read environment ENVNAME */

  devname = GETENV(devname+1);

if (STRHASLEN(devname))

{

  ndev = cuda->ndev;

  for(i=0; i<ndev; i++)

     if (!cuda->cudaGetDeviceProperties(&prop,i) && strstr(prop.name,devname))

        return i;

}

return -1;

}

/************************************************************

****************************/

C_FUNC_PREFIX int getcudadevicebymemory(const CUDA *cuda)

/************************************************************

****************************/

{

struct cudaDeviceProp prop;

size_t maxm = 0;

int i,ndev,idev = -1;

if (!cuda)

  return -1;

ndev = cuda->ndev;

for(i=0; i<ndev; i++)

  if (!cuda->cudaGetDeviceProperties(&prop,i) && prop.totalGlobalMem > maxm)

  {

     idev = i;

     maxm = prop.totalGlobalMem;

  }

return idev;

}

/************************************************************

****************************/

#if 1

int main(void)

{

const CUDA *cuda = getcuda();

struct cudaDeviceProp prop;

int i,ndev;

if (!cuda)

{

  puts("CUDA is not available.");

  return 1;

}

printf(“CUDA hardware is available: %d devices.\n”,ndev=cuda->ndev);

for(i=0; i<ndev; i++)

{

  cudaError_t err = cuda->cudaGetDeviceProperties(&prop,i);

  if (err)

  {

     printf

     (

        "cudaGetDeviceProperties: error #%d (%s).\n",

        err,cuda->cudaGetErrorString(err)

     );

  }

  else

  {

     printf

     (

        "\nDevice %d: \"%s\"\n"

        "   Total global memory (MB)     : %d\n"

        "   Shared memory per block (KB) : %d\n"

        "   Warp size                    : %d\n"

        "   Max. memory pitch            : %d\n"

        "   Max. threads per block       : %d\n"

        "   Max. threads dimension       : %d x %d x %d\n"

        "   Max. grid size               : %d x %d x %d\n"

        "   Total constant memory (KB)   : %d\n"

        "   Compute capability           : %d.%d\n"

        "   Clock rate (KHz)             : %d\n"

        "   Texture alignment            : %d\n"

        "   Device overlap               : %d\n"

        "   Multiprocessors on device    : %d\n"

        "   Kernel exec timeout enabled  : %d\n"

        "   Integrated                   : %d\n"

        "   Can map host memory          : %d\n"

        "   Compute mode                 : %d\n"

        , i

        , prop.name

        , prop.totalGlobalMem/(1024*1024)

        , prop.sharedMemPerBlock/1024

        , prop.warpSize

        , prop.memPitch

        , prop.maxThreadsPerBlock

        , prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]

        , prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]

        , prop.totalConstMem

        , prop.major, prop.minor

        , prop.clockRate

        , prop.textureAlignment

        , prop.deviceOverlap

        , prop.multiProcessorCount

        , prop.kernelExecTimeoutEnabled

        , prop.integrated

        , prop.canMapHostMemory

        , prop.computeMode

     );

  }

}

printf("\nBest CUDA devicebymemory = %d.\n",getcudadevicebymemory(cuda));

printf("\nBest CUDA devicebyname = %d.\n",getcudadevicebyname(cuda,NULL));

return 0;

}

#endif

[/codebox]