nvmlDeviceGetUtilizationRates fails with NVML_ERROR_UNKNOWN

The following call to nvmlDeviceGetUtilizationRates fails with an NVML_ERROR_UNKNOWN error (error code 999):

// initialize
    nvmlReturn_t rval = nvmlInit();
    if( rval != NVML_SUCCESS )
        printf("nvmlInit: %s\n", nvmlErrorString(rval));


    char szNvmlVersion[NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE];
    rval = nvmlSystemGetNVMLVersion( szNvmlVersion, sizeof szNvmlVersion );
    if( rval != NVML_SUCCESS )
        printf("nvmlSystemGetNVMLVersion: %s\n", nvmlErrorString(rval));

    char szDriverVersion[NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE];
    rval = nvmlSystemGetDriverVersion( szDriverVersion, sizeof szDriverVersion );
    if( rval != NVML_SUCCESS )
        printf("nvmlSystemGetDriverVersion: %s\n", nvmlErrorString(rval));

    printf( "Initialized NVML v%s (NVidia GPU driver v%s)\n", szNvmlVersion, szDriverVersion );

    // build a list of NVML device handles
    unsigned int deviceCount = 0;
    rval = nvmlDeviceGetCount( &deviceCount );
    if( rval != NVML_SUCCESS )
        printf("nvmlDeviceGetCount: %s\n", nvmlErrorString(rval));

    nvmlDevice_t* hGPU = reinterpret_cast<nvmlDevice_t*>(malloc(deviceCount*(sizeof nvmlDevice_t)));
    for( unsigned int i=0; i<deviceCount; ++i )
    {
        rval = nvmlDeviceGetHandleByIndex( i, hGPU+i );
        if( rval != NVML_SUCCESS )
            printf("nvmlDeviceGetHandleByIndex: %s\n", nvmlErrorString(rval));
    }
    printf( "Found %d GPU handle(s)", deviceCount );
    for( unsigned int n=0; n<deviceCount; ++n ) printf( " 0x%016llx", reinterpret_cast<unsigned long long>(hGPU[n]) );
    printf( "\n");

    // this works
    unsigned int pct;
    rval = nvmlDeviceGetFanSpeed( hGPU[0], &pct );
    if( rval != NVML_SUCCESS )
        printf("nvmlDeviceGetFanSpeed: %s\n", nvmlErrorString(rval));
    printf( "Fan speed (%%): %u\n", pct );

    // this fails
    nvmlUtilization_t nvmlUtilization = { 0 };
    rval = nvmlDeviceGetUtilizationRates( hGPU[0], &nvmlUtilization );
    if( rval != NVML_SUCCESS )
        printf("nvmlDeviceGetUtilizationRates: %s\n", nvmlErrorString(rval));
    printf( "Utilization: GPU=%u gmem=%u", nvmlUtilization.gpu, nvmlUtilization.memory );

    // shutdown
    rval = nvmlShutdown();
    if( rval != NVML_SUCCESS )
        printf("nvmlShutdown: %s\n", nvmlErrorString(rval));

    // discard the list of GPU handles
    free( hGPU );

Output from the above:
Initialized NVML v7.353.90 (NVidia GPU driver v353.90)
Found 3 GPU handle(s) 0x000007fef2ce9a98 0x000007fef2cea308 0x000007fef2ceab78
Fan speed (%): 41
nvmlDeviceGetUtilizationRates: Unknown Error
Utilization: GPU=0 gmem=0

Details:

  • three NVidia K20c devices
  • Windows Server 2008 R2
  • Visual Studio 2013
  • CUDA 7.5
  • Graphics driver 353.90
  • NVML (from CUDA toolkit 7.5, downloaded October 9, 2015)
  • 64-bit build (32-bit build not tested)

A workaround or fix would be great!