Hi All,
I am running the simple code below (dotProduct is a misnomer) on the default device of the GTX295 (not connected to display)
I run 5 instances of this code in background (./dotP & ./dotP & ./dotP & ./dotP & ./dotP &)
When I bring one of these processes to foreground and press CTRL_C, the system hangs for a few seconds and 2 instances die. One of them reports an ULF (unspecified launch failure).
It looks like the driver is corrupting the context of another running instance.
I can positively reproduce this case in my setup here. Sometimes, it does not happen on the first CTRL_C. But eventually happens before all the CTRL_Cs are exhausted (bring one by one to foreground(fg) and then CTRL_C)
We fear that this driver behavior could exist even in normal termination path (without pressing CTRL_C).
But we don’t have any solid evidence at the momment.
Request NVIDIA to look into this,
Many THANKS!
System info:
============
Ubuntu Lucid 10.04 x86_64
CUDA 3.2, Driver version: 260.19.26
nvcc -O2 -o dotP dotP.cu
GTX 295
#include <stdio.h>
#define MAX_N (4*1024*1024)
#define NUM_RUNS (10000)
#define ERR_CHECK(cuda_fn) \
{\
cudaError_t err = cuda_fn;\
if (err != cudaSuccess) \
{\
printf("CUDA Error: Line %d : %s\n", __LINE__ , cudaGetErrorString(err));\
exit(-1);\
}\
}
__global__ void dotProductGPU(float *a, float *b, float *c, int N)
{
int idx = blockIdx.x*blockDim.x + threadIdx.x;
float a1,b1;
float sum;
for(int i= idx; i<N; i+=blockDim.x*gridDim.x)
{
sum = 0;
a1 = a[idx];
b1 = b[idx];
for(int j=0; j<100; j++)
{
sum += sqrtf(a1*a1 + b1*b1 - a1*b1 -b1*a1);
}
c[idx] = sum;
}
return;
}
int doDotProduct(void)
{
float *aGPU, *bGPU, *cGPU;
printf("Doing Dot Product\n");
ERR_CHECK(cudaMalloc(&aGPU, MAX_N*sizeof(float)));
ERR_CHECK(cudaMalloc(&bGPU, MAX_N*sizeof(float)));
ERR_CHECK(cudaMalloc(&cGPU, MAX_N*sizeof(float)));
//cudaMemset(aGPU, 0, MAX_N*sizeof(float));
//cudaMemset(bGPU, 0, MAX_N*sizeof(float));
for(int i=0; i<NUM_RUNS; i++)
{
dotProductGPU<<<1000, 96>>>(aGPU, bGPU, cGPU, MAX_N);
}
ERR_CHECK(cudaThreadSynchronize());
ERR_CHECK(cudaFree(aGPU));
ERR_CHECK(cudaFree(bGPU));
ERR_CHECK(cudaFree(cGPU));
return 0;
}
int main(void)
{
doDotProduct();
return 0;
}