application crash and device memory

Is it possible to use CUDA after failed kernel (due to memory corruption or other causes)? Can application reset device?
Or the only way is to restart application? If application is crashed how CUDA releases memory on device?

I wonder the same. This simple example shows that it doesn’t work without doing anything.

[codebox]

global void testKernel1(int* arg)

{

*arg = 23;



/* crashing code */

int* x = (int*)INT_MAX;

*x = 0;



*arg = 1;

}

global void testKernel2(int* arg)

{

(*arg)++;

}

int main()

{

int *d_arg, *h_arg, arg_size = sizeof(int);

cudaMalloc(&d_arg, arg_size);

h_arg = (int*)malloc(arg_size);



/* this kernel should crash */

testKernel1<<<1,1>>>(d_arg);

cudaMemcpy(h_arg, d_arg, arg_size, cudaMemcpyDeviceToHost);

printf("h_arg = %d\n", *h_arg);



testKernel2<<<1,1>>>(d_arg);

cudaMemcpy(h_arg, d_arg, arg_size, cudaMemcpyDeviceToHost);

printf("h_arg = %d\n", *h_arg);

return 0;

}

[/codebox]

Execution result of the example above:

h_arg = 9074256

h_arg = 9074256

…instead of the following if the faulty code is commented out:

h_arg = 1

h_arg = 2

I wonder the same. This simple example shows that it doesn’t work without doing anything.

[codebox]

global void testKernel1(int* arg)

{

*arg = 23;



/* crashing code */

int* x = (int*)INT_MAX;

*x = 0;



*arg = 1;

}

global void testKernel2(int* arg)

{

(*arg)++;

}

int main()

{

int *d_arg, *h_arg, arg_size = sizeof(int);

cudaMalloc(&d_arg, arg_size);

h_arg = (int*)malloc(arg_size);



/* this kernel should crash */

testKernel1<<<1,1>>>(d_arg);

cudaMemcpy(h_arg, d_arg, arg_size, cudaMemcpyDeviceToHost);

printf("h_arg = %d\n", *h_arg);



testKernel2<<<1,1>>>(d_arg);

cudaMemcpy(h_arg, d_arg, arg_size, cudaMemcpyDeviceToHost);

printf("h_arg = %d\n", *h_arg);

return 0;

}

[/codebox]

Execution result of the example above:

h_arg = 9074256

h_arg = 9074256

…instead of the following if the faulty code is commented out:

h_arg = 1

h_arg = 2

cudaThreadExit() is what you want to reset a context that has crashed.

cudaThreadExit() is what you want to reset a context that has crashed.