hi, there,
I’m a new Cuda developer,
I wonder is there a elegant way to collect GPU crash reports ?
In CPU, there are many mature toolkit such as Google breakpad and others, after we publish a PC application to our customers , we can know the crash reason by dumping crash logs.
But in Cuda, I found that some memory issue can’t be catch by the debugger and no logs.
For example:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
__global__ void addKernel(int *c, const int *a, const int *b)
{
int i = threadIdx.x;
c[i] = a[i+1000000] + b[i+1000000] + threadIdx.x;
printf("%d\n", c[i]);
}
int main()
{
cudaError_t cudaStatus = cudaSuccess;
cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, 0);
printf("%s\n", prop.name);
printf("prop.major = %d\n", prop.major);
printf("prop.minor = %d\n", prop.minor);
printf("prop.managedMemory = %d\n", prop.managedMemory);
int *a, *b, *c;
addKernel << <1, 10 >> > (c, a, b);
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "addKernel launch failed!");
return 1;
}
cudaStatus = cudaFree(a);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaFree failed!");
return 1;
}
cudaStatus = cudaFree(b);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaFree failed!");
return 1;
}
cudaStatus = cudaFree(c);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaFree failed!");
return 1;
}
return 0;
}
It will cause the program crash without any useful call stack info.
I have tried Nvidia Nsight Graphics/Nsight Cuda debugging(VS plugin), found nothing.
Any one can help??