"setting the device when a process is active is not allowed"

Hi guys!

I’m getting the error in the Title when I try to call cudaSetDevice(i);

after i called my CUDA DLL function From C#

As I read it, I probably have some cuda device memory still not freed, but thats not likely couse I use only 2 device arrays and i free them every time. Did I do something wrong? Does any other thing could cause my problem?

shorten cuda DLL function code:

int *a is my array with 0-255 number

int *eredmenytomb is my calculated array with numbers 0-12750

[codebox]stuktura_tipus* DLLfun2(int *a, int *eredmenytomb, int dimX, int dimY, int filter, int offset)

{

char *error;

stuktura_tipus *my_stuktura;

my_stuktura = (stuktura_tipus*) malloc(sizeof(stuktura_tipus)); 

int dimA = dimX*dimY; 

//my_stuktura->eredmenykeptomb = (int*) malloc(sizeof(int)*dimA);

// define grid and block size

    int numThreadsPerBlock = 16;

int n_blocks = dimA/numThreadsPerBlock + (dimA%numThreadsPerBlock == 0?0:1);  // 1024 blocks

// pointer for device memory

int *d_a, *d_b;

// allocate host and device memory

   size_t memSize = dimA * sizeof(int);

cudaMalloc( (void **) &d_a, memSize );

cudaMalloc( (void **) &d_b, memSize );

// Copy host array to device array

    cudaMemcpy( d_a, a, memSize, cudaMemcpyHostToDevice );

error = checkCUDAError(" memcpy_todevice ");

if (error != "noerr"){

	my_stuktura->cuda_err = error;

	// free device memory on error

	cudaFree(d_a);

	cudaFree(d_b);

	return my_stuktura;

}

// launch kernel

reverseArrayBlock<<< n_blocks, numThreadsPerBlock >>>( d_b , d_a , dimA, dimX, dimY, offset);

// block until the device has completed

    cudaThreadSynchronize();

// check if kernel execution generated an error

error = checkCUDAError(" kernel invocation ");

if (error != "noerr"){

	my_stuktura->cuda_err = error;

	// free device memory on error

	cudaFree(d_a);

	cudaFree(d_b);

	return my_stuktura;

}

// device to host copy

cudaMemcpy( eredmenytomb, d_b, memSize, cudaMemcpyDeviceToHost );



// Check for any CUDA errors

error = checkCUDAError(" memcpy_backtoHost ");

if (error != "noerr"){

	my_stuktura->cuda_err = error;

	// free device memory on error

	cudaFree(d_a);

	cudaFree(d_b);

	return my_stuktura;

}



// free device memory

cudaFree(d_a);

cudaFree(d_b);

my_stuktura->cuda_err = error;

return my_stuktura;

};[/codebox]

Hi guys!

I’m getting the error in the Title when I try to call cudaSetDevice(i);

after i called my CUDA DLL function From C#

As I read it, I probably have some cuda device memory still not freed, but thats not likely couse I use only 2 device arrays and i free them every time. Did I do something wrong? Does any other thing could cause my problem?

shorten cuda DLL function code:

int *a is my array with 0-255 number

int *eredmenytomb is my calculated array with numbers 0-12750

[codebox]stuktura_tipus* DLLfun2(int *a, int *eredmenytomb, int dimX, int dimY, int filter, int offset)

{

char *error;

stuktura_tipus *my_stuktura;

my_stuktura = (stuktura_tipus*) malloc(sizeof(stuktura_tipus)); 

int dimA = dimX*dimY; 

//my_stuktura->eredmenykeptomb = (int*) malloc(sizeof(int)*dimA);

// define grid and block size

    int numThreadsPerBlock = 16;

int n_blocks = dimA/numThreadsPerBlock + (dimA%numThreadsPerBlock == 0?0:1);  // 1024 blocks

// pointer for device memory

int *d_a, *d_b;

// allocate host and device memory

   size_t memSize = dimA * sizeof(int);

cudaMalloc( (void **) &d_a, memSize );

cudaMalloc( (void **) &d_b, memSize );

// Copy host array to device array

    cudaMemcpy( d_a, a, memSize, cudaMemcpyHostToDevice );

error = checkCUDAError(" memcpy_todevice ");

if (error != "noerr"){

	my_stuktura->cuda_err = error;

	// free device memory on error

	cudaFree(d_a);

	cudaFree(d_b);

	return my_stuktura;

}

// launch kernel

reverseArrayBlock<<< n_blocks, numThreadsPerBlock >>>( d_b , d_a , dimA, dimX, dimY, offset);

// block until the device has completed

    cudaThreadSynchronize();

// check if kernel execution generated an error

error = checkCUDAError(" kernel invocation ");

if (error != "noerr"){

	my_stuktura->cuda_err = error;

	// free device memory on error

	cudaFree(d_a);

	cudaFree(d_b);

	return my_stuktura;

}

// device to host copy

cudaMemcpy( eredmenytomb, d_b, memSize, cudaMemcpyDeviceToHost );



// Check for any CUDA errors

error = checkCUDAError(" memcpy_backtoHost ");

if (error != "noerr"){

	my_stuktura->cuda_err = error;

	// free device memory on error

	cudaFree(d_a);

	cudaFree(d_b);

	return my_stuktura;

}



// free device memory

cudaFree(d_a);

cudaFree(d_b);

my_stuktura->cuda_err = error;

return my_stuktura;

};[/codebox]

Hi

you can not call cudaSetDevice if a device is still associated with your process. And a device is implicitely associated with your process if you call any cuda funtion (except for the device management functions themself). I think you can free a device with cudaThreadExit() again (which will also free all memory on the device). But not to sure about that.

Ceearem

Hi

you can not call cudaSetDevice if a device is still associated with your process. And a device is implicitely associated with your process if you call any cuda funtion (except for the device management functions themself). I think you can free a device with cudaThreadExit() again (which will also free all memory on the device). But not to sure about that.

Ceearem

It worked…I called cudaThreadExit() at the end of my function and there weren’t problem with cudaSetDevice.

THX External Media

It worked…I called cudaThreadExit() at the end of my function and there weren’t problem with cudaSetDevice.

THX External Media