Cuda Hello world Program executed but getting this error (cudaErrorNoKernelImageForDevice 209)

shalinsirwani · January 7, 2020, 8:16am

( Architecture :- Quadro K600 ) ,Code is written below…

#include “cuda_runtime.h”
#include “device_launch_parameters.h”

cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size);

global void addKernel(int *c, const int *a, const int *b)
{
int i = threadIdx.x;
c[i] = a[i] + b[i];
}

int main()
{
const int arraySize = 5;
const int a[arraySize] = { 1, 2, 3, 4, 5 };
const int b[arraySize] = { 10, 20, 30, 40, 50 };
int c[arraySize] = { 0 };

// Add vectors in parallel.
cudaError_t cudaStatus = addWithCuda(c, a, b, arraySize);
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "addWithCuda failed!");
    return 1;
}

printf("{1,2,3,4,5} + {10,20,30,40,50} = {%d,%d,%d,%d,%d}\n",
    c[0], c[1], c[2], c[3], c[4]);

// cudaDeviceReset must be called before exiting in order for profiling and
// tracing tools such as Nsight and Visual Profiler to show complete traces.
cudaStatus = cudaDeviceReset();
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaDeviceReset failed!");
    return 1;
}

return 0;

}

// Helper function for using CUDA to add vectors in parallel.
cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size)
{
int *dev_a = 0;
int *dev_b = 0;
int *dev_c = 0;
cudaError_t cudaStatus;

// Choose which GPU to run on, change this on a multi-GPU system.
cudaStatus = cudaSetDevice(0);
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaSetDevice failed!  Do you have a CUDA-capable GPU installed?");
    goto Error;
}

// Allocate GPU buffers for three vectors (two input, one output)    .
cudaStatus = cudaMalloc((void**)&dev_c, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaMalloc failed!");
    goto Error;
}

cudaStatus = cudaMalloc((void**)&dev_a, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaMalloc failed!");
    goto Error;
}

cudaStatus = cudaMalloc((void**)&dev_b, size * sizeof(int));
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaMalloc failed!");
    goto Error;
}

// Copy input vectors from host memory to GPU buffers.
cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaMemcpy failed!");
    goto Error;
}

cudaStatus = cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaMemcpy failed!");
    goto Error;
}

// Launch a kernel on the GPU with one thread for each element.
addKernel<<<1, size>>>(dev_c, dev_a, dev_b);

// Check for any errors launching the kernel
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
    goto Error;
}

// cudaDeviceSynchronize waits for the kernel to finish, and returns
// any errors encountered during the launch.
cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);
    goto Error;
}

// Copy output vector from GPU buffer to host memory.
cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaMemcpy failed!");
    goto Error;
}

Error:
cudaFree(dev_c);
cudaFree(dev_a);
cudaFree(dev_b);

return cudaStatus;

}

I am executing this code on visual studio 2019. With toolkit 10.1, How to resolve this ???

mnicely · January 8, 2020, 8:19pm

Can you make sure you can run deviceQuery from the Utilities folder in the NVIDIA samples?

Robert_Crovella · January 9, 2020, 3:49am

no kernel image for device means you are compiling for the wrong architecture.

Your quadro K600 is a compute capability 3.0 device.

you want to select compute_30 and sm_30 for that device, in your project settings.

https://stackoverflow.com/questions/16832026/how-to-change-compute-and-sm-parameters-in-visual-studio-2010

https://stackoverflow.com/questions/14411435/how-to-set-cuda-compiler-flags-in-visual-studio-2010/14413360#14413360

shalinsirwani · January 9, 2020, 5:07am

This solves my problem (select compute_30 and sm_30 for that device, in your project settings).
Thank You Robert_Crovella(https://devtalk.nvidia.com/member/1836397/)

Topic		Replies	Views
addKernel launch failed: no kernel image is available CUDA Programming and Performance	3	3741	October 12, 2021
Need Help to get CUDA running with c++ CUDA Setup and Installation	1	563	June 25, 2019
Kernel Launch Failure Very simple kernel CUDA Programming and Performance	3	3968	September 14, 2011
Question for "CUDA" Jetson Nano	4	1154	October 18, 2021
addKernel launch failed: no kernel image is available Nsight Visual Studio Edition	0	400	June 30, 2021
cudaError 209 CUDA Programming and Performance cuda , kernel , ubuntu	2	518	December 6, 2024
Error: no kernel image is available for execution on the device while running CUDA Kernel DRIVE AGX Orin General driveos-cuda	2	999	October 10, 2023
Developer tools Error CUDA Programming and Performance cuda	4	475	July 26, 2023
no kernel image is available for execution on the device CUDA Setup and Installation	1	1989	February 14, 2020
No kernel image available for execution on the device Jetson Nano	3	1720	October 18, 2021

Cuda Hello world Program executed but getting this error (cudaErrorNoKernelImageForDevice 209)

Related topics