The code is only this
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
__global__ void helloFromGPU(void)
{
printf("Hello World from GPU!\n");
}
int main(void)
{
helloFromGPU << <1, 1 >> >();
cudaDeviceReset();
return 0;
}
but I always got this error
D:\Programing\CudaTest\x64\Debug>nvprof CudaTest
==14844== NVPROF is profiling process 14844, command: CudaTest
Hello World from GPU!
==14844== Profiling application: CudaTest
==14844== Warning: Found 41 invalid records in the result.
==14844== Warning: This can happen if device ran out of memory or if a device kernel was stopped due to an assertion.
==14844== Profiling result:
Type Time(%) Time Calls Avg Min Max Name
GPU activities: 100.00% 45.215us 1 45.215us 45.215us 45.215us helloFromGPU(void)
API calls: 77.18% 121.10ms 1 121.10ms 121.10ms 121.10ms cudaLaunch
22.52% 35.337ms 1 35.337ms 35.337ms 35.337ms cudaDeviceReset
0.23% 363.82us 55 6.6140us 255ns 166.32us cuDeviceGetAttribute
0.06% 90.954us 1 90.954us 90.954us 90.954us cuDeviceGetName
0.01% 9.4530us 1 9.4530us 9.4530us 9.4530us cuDeviceTotalMem
0.00% 6.3870us 1 6.3870us 6.3870us 6.3870us cudaConfigureCall
0.00% 2.0430us 2 1.0210us 255ns 1.7880us cuDeviceGetCount
0.00% 767ns 1 767ns 767ns 767ns cuDeviceGet
D:\Programing\CudaTest\x64\Debug>nvprof CudaTest
==13116== NVPROF is profiling process 13116, command: CudaTest
Hello World from GPU!
==13116== Profiling application: CudaTest
==13116== Warning: Found 46 invalid records in the result.
==13116== Warning: This can happen if device ran out of memory or if a device kernel was stopped due to an assertion.
==13116== Profiling result:
Type Time(%) Time Calls Avg Min Max Name
GPU activities: 100.00% 45.183us 1 45.183us 45.183us 45.183us helloFromGPU(void)
API calls: 71.63% 99.948ms 1 99.948ms 99.948ms 99.948ms cudaLaunch
28.05% 39.142ms 1 39.142ms 39.142ms 39.142ms cudaDeviceReset
0.24% 340.82us 48 7.1000us 255ns 161.73us cuDeviceGetAttribute
0.06% 87.377us 1 87.377us 87.377us 87.377us cuDeviceGetName
0.00% 6.1310us 1 6.1310us 6.1310us 6.1310us cudaConfigureCall
0.00% 5.8760us 1 5.8760us 5.8760us 5.8760us cuDeviceTotalMem
0.00% 2.3000us 3 766ns 255ns 1.7890us cuDeviceGetCount
0.00% 766ns 2 383ns 255ns 511ns cuDeviceGet
D:\Programing\CudaTest\x64\Debug>nvprof CudaTest
==4692== NVPROF is profiling process 4692, command: CudaTest
Hello World from GPU!
==4692== Profiling application: CudaTest
==4692== Warning: Found 23 invalid records in the result.
==4692== Warning: This can happen if device ran out of memory or if a device kernel was stopped due to an assertion.
==4692== Profiling result:
Type Time(%) Time Calls Avg Min Max Name
GPU activities: 100.00% 44.991us 1 44.991us 44.991us 44.991us helloFromGPU(void)
API calls: 68.18% 86.218ms 1 86.218ms 86.218ms 86.218ms cudaLaunch
31.45% 39.769ms 1 39.769ms 39.769ms 39.769ms cudaDeviceReset
0.29% 366.12us 73 5.0150us 255ns 165.56us cuDeviceGetAttribute
0.07% 93.253us 1 93.253us 93.253us 93.253us cuDeviceGetName
0.01% 6.8980us 1 6.8980us 6.8980us 6.8980us cuDeviceTotalMem
0.00% 6.1320us 1 6.1320us 6.1320us 6.1320us cudaConfigureCall
0.00% 2.2990us 2 1.1490us 255ns 2.0440us cuDeviceGetCount
0.00% 766ns 1 766ns 766ns 766ns cuDeviceGet
even the this file which doesn’t access any kernel I also got the warning
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
#include "common.h"
int main(int argc, char *argv[]) {
int iDev = 0;
cudaDeviceProp iProp;
cudaGetDeviceProperties(&iProp, iDev);
printf("Device %d: %s\n", iDev, iProp.name);
printf("Number of multiprocessors: %d\n", iProp.multiProcessorCount);
printf("Total amount of constant memory: %4.2f KB\n",
iProp.totalConstMem / 1024.0);
printf("Total amount of shared memory per block: %4.2f KB\n",
iProp.sharedMemPerBlock / 1024.0);
printf("Total number of registers available per block: %d\n",
iProp.regsPerBlock);
printf("Warp size: %d\n", iProp.warpSize);
printf("Maximum number of threads per block: %d\n", iProp.maxThreadsPerBlock);
printf("Maximum number of threads per multiprocessor : %d\n",
iProp.maxThreadsPerMultiProcessor);
printf("Maximum number of warps per multiprocessor: %d\n",
iProp.maxThreadsPerMultiProcessor / 32);
cudaDeviceReset();
return EXIT_SUCCESS;
}
D:\Programing\CudaTest\x64\Debug>nvprof CudaTest
==7204== NVPROF is profiling process 7204, command: CudaTest
Device 0: GeForce GTX 1080
Number of multiprocessors: 20
Total amount of constant memory: 64.00 KB
Total amount of shared memory per block: 48.00 KB
Total number of registers available per block: 65536
Warp size: 32
Maximum number of threads per block: 1024
Maximum number of threads per multiprocessor : 2048
Maximum number of warps per multiprocessor: 64
==7204== Profiling application: CudaTest
==7204== Warning: Found 52 invalid records in the result.
==7204== Warning: This can happen if device ran out of memory or if a device kernel was stopped due to an assertion.
==7204== Profiling result:
No kernels were profiled.
Type Time(%) Time Calls Avg Min Max Name
API calls: 49.97% 478.27us 42 11.387us 255ns 259.32us cuDeviceGetAttribute
34.57% 330.86us 1 330.86us 330.86us 330.86us cudaGetDeviceProperties
9.69% 92.743us 1 92.743us 92.743us 92.743us cuDeviceGetName
4.72% 45.221us 1 45.221us 45.221us 45.221us cudaDeviceReset
0.64% 6.1310us 1 6.1310us 6.1310us 6.1310us cuDeviceTotalMem
0.27% 2.5550us 3 851ns 255ns 1.7890us cuDeviceGetCount
0.13% 1.2780us 2 639ns 256ns 1.0220us cuDeviceGet
and when I am trying to profile gld_efficiancy (on other cuda file) I got no result because of this warning
this is the project https://drive.google.com/open?id=18oYuKoFEG9yDlv9zTrkVly0-1BVAdTzP
I have no idea how to solve this now.
Any help is appreciate
Thanks.
PS. I build using visual studio community 2017 by clinking Build → Rebuild Solution and nvprof on the .exe that output from this command. I try restart computer, reinstall visual studio, reinstall cuda and make geforce experience driver the same as shipped with cuda but it is still the same. 2 days ago I don’t have this kind of problem and nvprof was working fine now everytime I try nvprof it shows this warning