cudaErrorInvalidDeviceFunction Simple program throwing cudaErrorInvalidDeviceFunction error

vinaybabug · April 21, 2010, 10:33pm

Hi,

I just started getting my hands dirty with CUDA programming. I am using GTX 285; on window server 2003, and using VS2005 for IDE.

I am trying to execute below code on the device and it throws cudaErrorInvalidDeviceFunction when in debug\release mode, however when in emulation mode it works fine. Not sure where to start trouble shooting, any pointers will be helpful :)

#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <cuda.h>

#define EMU_RELEASE 0
global void incrementArrayOnDevice(float *a, int N);
void checkError(cudaError_t mem_error);

int main(int argc, char** argv)
{
int N = 10;
float *a_h, *b_h;
float *a_d;
int count = 0;
int BLOCK_SIZE = 4;
int nBlocks = 0;
cudaError_t mem_error;
// allocate memory on the host
a_h = (float *)malloc(sizeof(float)*N);
b_h = (float *)malloc(sizeof(float)*N);

for(count = 0; count < N; count++)
{
	a_h[count] = 0;
	b_h[count] = 0;
}

for(count = 0; count < N; count++)
{
	printf(" %lf", b_h[count]);
}
// allocate memory on the device
mem_error = cudaMalloc((void **) &a_d, sizeof(float)*N);
checkError(mem_error);


// copy values of the array to device for calculations
mem_error = cudaMemcpy(a_d, a_h, sizeof(float)*N, cudaMemcpyHostToDevice);	
checkError(mem_error);
nBlocks = N/BLOCK_SIZE + (N%BLOCK_SIZE==0?0:1);

incrementArrayOnDevice<<<nBlocks, BLOCK_SIZE>>>(a_d, N);
mem_error = cudaGetLastError(); 
checkError(mem_error);


mem_error =  cudaMemcpy(b_h, a_d, sizeof(float)*N, cudaMemcpyDeviceToHost);
checkError(mem_error);
printf("\n After copy...");
for(count = 0; count < N; count++)
{
	printf(" %lf", b_h[count]);
}

system ("pause");
return 0;

}

global void incrementArrayOnDevice(float *a, int N)
{
int threadID = blockIdx.x * blockDim.x + threadIdx.x;
if(threadID < N)
{
a[threadID] += 1;
//printf(“\n%f”, a[threadID]);
}
}

void checkError(cudaError_t mem_error)
{
if(mem_error == cudaSuccess)
{
printf(“No errors.”);
}
if(mem_error == cudaErrorMissingConfiguration)
{
printf(“\nMissing configuration error.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorMemoryAllocation)
{
printf(“\nMemory allocation error.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorInitializationError)
{
printf(“\nInitialization error.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorLaunchFailure)
{
printf(“\nLaunch failure.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorPriorLaunchFailure)
{
printf(“\nPrior launch failure.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorLaunchTimeout)
{
printf(“\nLaunch timeout error.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorLaunchOutOfResources)
{
printf(“\nLaunch out of resources error.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorInvalidDeviceFunction)
{
printf(“\nInvalid device function.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorInvalidConfiguration)
{
printf(“\nInvalid configuration.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorInvalidDevice)
{
printf(“\nInvalid device.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorInvalidValue)
{
printf(“\nInvalid value.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorInvalidPitchValue)
{
printf(“\nInvalid pitch value.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorInvalidSymbol)
{
printf(“\nInvalid symbol.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorMapBufferObjectFailed)
{
printf(“\nMap buffer object failed.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorUnmapBufferObjectFailed)
{
printf(“\nUnmap buffer object failed.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorInvalidHostPointer)
{
printf(“\nInvalid host pointer.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorInvalidDevicePointer)
{
printf(“\nInvalid device pointer.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorInvalidTexture)
{
printf(“\nInvalid texture.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorInvalidTextureBinding)
{
printf(“\nInvalid texture binding.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorInvalidChannelDescriptor)
{
printf(“\nInvalid channel descriptor.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorInvalidMemcpyDirection)
{
printf(“\nInvalid memcpy direction.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorAddressOfConstant)
{
printf(“\nAddress of constant error.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorTextureFetchFailed)
{
printf(“\nTexture fetch failed.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorTextureNotBound)
{
printf(“\nTexture not bound error.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorSynchronizationError)
{
printf(“\nSynchronization error.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorInvalidFilterSetting)
{
printf(“\nInvalid filter setting.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorInvalidNormSetting)
{
printf(“\nInvalid norm setting.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorMixedDeviceExecution)
{
printf(“\nMixed device execution.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorCudartUnloading)
{
printf(“\nCUDA runtime unloading.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorUnknown)
{
printf(“\nUnknown error condition.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorNotYetImplemented)
{
printf(“\nFunction not yet implemented.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorMemoryValueTooLarge)
{
printf(“\nMemory value too large.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorInvalidResourceHandle)
{
printf(“\nInvalid resource handle.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorNotReady)
{
printf(“\nNot ready error.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorInsufficientDriver)
{
printf(“\nCUDA runtime is newer than driver.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorSetOnActiveProcess)
{
printf(“\nSet on active process error.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorNoDevice)
{
printf(“\nNo available CUDA device.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorStartupFailure)
{
printf(“\nStartup failure.\n”);
system (“pause”);
exit(0);
}
if(mem_error == cudaErrorApiFailureBase)
{
printf(“\nAPI failure base.\n”);
system (“pause”);
exit(0);
}
}

Cheers

vinaybabug · April 24, 2010, 6:03pm

Sorry problem was not with the code, but actually with the nvcc command arguments. All I had to do was use correct project creating wizard (i have installed two cuda project creation templates, guess one comes when cuda sdk is installed and other was from the forum not very reliable). And set appropriate lib path for the project, as mentioned in cuda docs.

Topic		Replies	Views
cudaErrorInvalidDeviceFunction : what's the cause ? CUDA Programming and Performance	0	1644	January 19, 2012
CUDA Error: Invalid Device Function Debugging CUDA errors CUDA Programming and Performance	3	5812	July 29, 2009
execution happens but still invalid device fuction CUDA Programming and Performance	0	1125	August 18, 2009
invalid device function error: cudaErrorInvalidDeviceFunction CUDA Programming and Performance	6	8890	March 12, 2010
invalid device function CUDA Programming and Performance	2	3812	July 8, 2009
Invalid device function CUDA Programming and Performance	10	6565	November 19, 2008
invalid device function, all CUDA-capable devices are busy or unavailable CUDA Programming and Performance	5	7823	July 6, 2013
invalid device function runtime error occurs only when running on actual device CUDA Programming and Performance	0	2066	December 4, 2009
Error when using cudaLaunch cudaErrorInvalidDeviceFunction error CUDA Programming and Performance	1	4580	July 1, 2009
invalid device function CUDA Setup and Installation	5	5965	November 16, 2016

cudaErrorInvalidDeviceFunction Simple program throwing cudaErrorInvalidDeviceFunction error

Related topics