Hi
I’m new to CUDA and was trying out the simple vector addition code. However, I’m not able to compile my code. For the sake of re-usability, I’ve defined a class Vector
which basically holds the pointer to a dynamically allocated array and the length of the array (full code here). I’ve written my main function in a cpp file as follows:
#include <iostream>
using namespace std;
#include "Vector.cu"
#include "utils.cu"
#include "vecAdd.cu"
int main() {
// Length of an array
int len = 0;
std::cout <<"Enter the length of vector: ";
std::cin >> len;
Vector v1(len), v2(len), v3(len);
// Initialising vectors
rand_init(v1);
rand_init(v2);
val_init(v3,0);
// Moving to GPU
Vector d_v1 = toDevice(v1, true);
Vector d_v2 = toDevice(v2, true);
Vector d_v3 = toDevice(v3, true);
// GPU sum
addDevice(d_v1, d_v2, d_v3, 256);
// Moving back from GPU
Vector h_v3 = toHost(d_v3);
print(h_v3, "h_v3");
return 0;
}
I’ve tested all the parts where I copy data to device and it’s working as expected however I’m not able to execute the kernel on device.
addDevice
function is present in vecAdd.cu file and it’s contents are as follows:
#include <cuda.h>
#include <cuda_runtime.h>
using namespace std;
void addDevice(Vector v1, Vector v2, Vector v3, int numThrds)
{
assert (v1.device == true && v2.device == true && v3.device == true);
assert (v1.len == v2.len);
assert (v1.len == v3.len);
addKernel<<<ceil(v1.len/(float)numThrds),numThrds>>>(v1.ptr, v2.ptr, v3.ptr, v1.len);
}
__global__ void addKernel(float* v1, float* v2, float* v3, int len)
{
int idx = blockDim.x * blockIdx.x + threadIdx.x;
if (idx < len)
v3[idx] = v1[idx] + v2[idx];
}
When I compile main.cpp, I get the following errors:
In file included from main.cpp:6:
vecAdd.cu: In function ‘void addDevice(Vector, Vector, Vector, int)’:
vecAdd.cu:23:5: error: ‘addKernel’ was not declared in this scope
addKernel<<<ceil(v1.len/(float)numThrds),numThrds>>>(v1.ptr, v2.ptr, v3.ptr, v1.len);
^~~~~~~~~
vecAdd.cu:23:16: error: expected primary-expression before ‘<’ token
addKernel<<<ceil(v1.len/(float)numThrds),numThrds>>>(v1.ptr, v2.ptr, v3.ptr, v1.len);
^
vecAdd.cu:23:17: error: ‘ceil’ was not declared in this scope
addKernel<<<ceil(v1.len/(float)numThrds),numThrds>>>(v1.ptr, v2.ptr, v3.ptr, v1.len);
^~~~
vecAdd.cu:23:56: error: expected primary-expression before ‘>’ token
addKernel<<<ceil(v1.len/(float)numThrds),numThrds>>>(v1.ptr, v2.ptr, v3.ptr, v1.len);
^
vecAdd.cu: In function ‘void addKernel(float*, float*, float*, int)’:
vecAdd.cu:29:15: error: ‘blockDim’ was not declared in this scope
int idx = blockDim.x * blockIdx.x + threadIdx.x;
^~~~~~~~
vecAdd.cu:29:15: note: suggested alternative: ‘flockfile’
int idx = blockDim.x * blockIdx.x + threadIdx.x;
^~~~~~~~
flockfile
vecAdd.cu:29:28: error: ‘blockIdx’ was not declared in this scope
int idx = blockDim.x * blockIdx.x + threadIdx.x;
^~~~~~~~
vecAdd.cu:29:28: note: suggested alternative: ‘clock’
int idx = blockDim.x * blockIdx.x + threadIdx.x;
^~~~~~~~
clock
vecAdd.cu:29:41: error: ‘threadIdx’ was not declared in this scope
int idx = blockDim.x * blockIdx.x + threadIdx.x;
^~~~~~~~~
vecAdd.cu:29:41: note: suggested alternative: ‘pthread_t’
int idx = blockDim.x * blockIdx.x + threadIdx.x;
^~~~~~~~~
pthread_t
I don’t understand how I can resolve this issue. Hope somebody can help me.
Thanks