Launching CUDA kernel with C++ main file

Hi

I’m new to CUDA and was trying out the simple vector addition code. However, I’m not able to compile my code. For the sake of re-usability, I’ve defined a class Vector which basically holds the pointer to a dynamically allocated array and the length of the array (full code here). I’ve written my main function in a cpp file as follows:

#include <iostream>
using namespace std;

#include "Vector.cu"
#include "utils.cu"
#include "vecAdd.cu"

int main() {
    // Length of an array
    int len = 0;
    std::cout <<"Enter the length of vector: ";
    std::cin >> len;

    Vector v1(len), v2(len), v3(len);

    // Initialising vectors
    rand_init(v1);
    rand_init(v2);
    val_init(v3,0);


    // Moving to GPU
    Vector d_v1 = toDevice(v1, true);
    Vector d_v2 = toDevice(v2, true);
    Vector d_v3 = toDevice(v3, true);

    // GPU sum
    addDevice(d_v1, d_v2, d_v3, 256);
    
    // Moving back from GPU
    Vector h_v3 = toHost(d_v3);

    print(h_v3, "h_v3");

    return 0;
}

I’ve tested all the parts where I copy data to device and it’s working as expected however I’m not able to execute the kernel on device.

addDevice function is present in vecAdd.cu file and it’s contents are as follows:

#include <cuda.h>
#include <cuda_runtime.h>

using namespace std;

void addDevice(Vector v1, Vector v2, Vector v3, int numThrds)
{
    assert (v1.device == true && v2.device == true && v3.device == true);
    assert (v1.len == v2.len);
    assert (v1.len == v3.len);

    addKernel<<<ceil(v1.len/(float)numThrds),numThrds>>>(v1.ptr, v2.ptr, v3.ptr, v1.len);
}


__global__ void addKernel(float* v1, float* v2, float* v3, int len)
{
    int idx = blockDim.x * blockIdx.x + threadIdx.x;
    if (idx < len)
        v3[idx] = v1[idx] + v2[idx];
}

When I compile main.cpp, I get the following errors:

In file included from main.cpp:6:
vecAdd.cu: In function ‘void addDevice(Vector, Vector, Vector, int)’:
vecAdd.cu:23:5: error: ‘addKernel’ was not declared in this scope
     addKernel<<<ceil(v1.len/(float)numThrds),numThrds>>>(v1.ptr, v2.ptr, v3.ptr, v1.len);
     ^~~~~~~~~
vecAdd.cu:23:16: error: expected primary-expression before ‘<’ token
     addKernel<<<ceil(v1.len/(float)numThrds),numThrds>>>(v1.ptr, v2.ptr, v3.ptr, v1.len);
                ^
vecAdd.cu:23:17: error: ‘ceil’ was not declared in this scope
     addKernel<<<ceil(v1.len/(float)numThrds),numThrds>>>(v1.ptr, v2.ptr, v3.ptr, v1.len);
                 ^~~~
vecAdd.cu:23:56: error: expected primary-expression before ‘>’ token
     addKernel<<<ceil(v1.len/(float)numThrds),numThrds>>>(v1.ptr, v2.ptr, v3.ptr, v1.len);
                                                        ^
vecAdd.cu: In function ‘void addKernel(float*, float*, float*, int)’:
vecAdd.cu:29:15: error: ‘blockDim’ was not declared in this scope
     int idx = blockDim.x * blockIdx.x + threadIdx.x;
               ^~~~~~~~
vecAdd.cu:29:15: note: suggested alternative: ‘flockfile’
     int idx = blockDim.x * blockIdx.x + threadIdx.x;
               ^~~~~~~~
               flockfile
vecAdd.cu:29:28: error: ‘blockIdx’ was not declared in this scope
     int idx = blockDim.x * blockIdx.x + threadIdx.x;
                            ^~~~~~~~
vecAdd.cu:29:28: note: suggested alternative: ‘clock’
     int idx = blockDim.x * blockIdx.x + threadIdx.x;
                            ^~~~~~~~
                            clock
vecAdd.cu:29:41: error: ‘threadIdx’ was not declared in this scope
     int idx = blockDim.x * blockIdx.x + threadIdx.x;
                                         ^~~~~~~~~
vecAdd.cu:29:41: note: suggested alternative: ‘pthread_t’
     int idx = blockDim.x * blockIdx.x + threadIdx.x;
                                         ^~~~~~~~~
                                         pthread_t

I don’t understand how I can resolve this issue. Hope somebody can help me.

Thanks