undeclared identifier using CUDA

atortosa · March 19, 2009, 2:39pm

Hello everybody,

I am relatively new to CUDA. I wrote a kernel that I am including in one of my C++ projects.
I think the kernel is well implemented, however when I try to compile the project, I get the following error message:

error C2065: ‘threadIdx’ : undeclared identifier
error C2228: left of ‘.x’ must have class/struct/union

Can anyone help me solve this problem, please ???

Thank you very much in advance.
Greetings.

pacard · March 19, 2009, 2:44pm

did you include the cuda.h file?
did you write your cuda code in .cu file?
did you use nvcc to compile the .cu file?

atortosa · March 19, 2009, 2:49pm

did you include the cuda.h file?

yes, I include this header.

did you write your cuda code in .cu file?

Yes, I wrote my kernel in a .cu file (matvecmult_kernel.cu)

did you use nvcc to compile the .cu file?

I am not sure about that … I include a new Build Rule which identifies the
.cu file, and I think that in this rule, the command argument nvcc … is
already specified.

atortosa · March 19, 2009, 3:24pm

Please, can someone help me?

Paul_Russell · March 19, 2009, 5:09pm

You might need to post your code.

jph4599 · March 19, 2009, 5:22pm

If you’re using visual studio, you should check out the CUDA VS Wizard. This should set the building environment up for you.

baarts · March 19, 2009, 5:57pm

since it’s cl that’s complaining I’m pretty sure you’re using threadIdx in host code.

atortosa · March 20, 2009, 10:09am

These are the host and device code that I am using:

HOST CODE (matvecmult.cu):

======
// HOST CODE 17/03/2008
// Sparse Matrix-Vector multiplication.

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>

#include <cutil_inline.h>
#include “matvecmult_kernel.cu”

// Multiplicacion matriz por vector A * x = res
void Mult(const double* gval_, const int* gcolind_, const int* growptr_, const int nrow, const int ncol,
const int nnzero, const double* gx_, double* res_)
{

int size_a, size_a2, size_b, size_b2, size_c;

// Load SparseMatrix “A” and vectors “x” and “res” to the device
// Note: Subscript _d identifies a device variable !!

double* gval_d ;
int* growptr_d ;
int* gcolind_d ;
int* ndim_d;
double* gx_d ;
double* res_d;

int ndim_[3] ;
ndim_[0] = nrow ; ndim_[1] = ncol ; ndim_[2] = nnzero ;

size_a = nnzero * sizeof(double); // non-zero elements size
size_a2 = nnzero * sizeof(int); // non-zero elements size
size_b = (nrow+1) * sizeof(double); // row size
size_b2 = nrow * sizeof(int); // row size
size_c = 3*sizeof(int); // ndim size

// Memory allocation into the device
cudaMalloc((void**)&gval_d, size_a);
cudaMalloc((void**)&gcolind_d, size_a2);
cudaMalloc((void**)&growptr_d, size_b2);
cudaMalloc((void**)&ndim_d, size_c);
cudaMalloc((void**)&gx_d, size_b);
cudaMalloc((void**)&res_d, size_b);

// Copy host to device variables
cudaMemcpy(gval_d, gval_, size_a, cudaMemcpyHostToDevice);
cudaMemcpy(gcolind_d, gcolind_, size_a2, cudaMemcpyHostToDevice);
cudaMemcpy(growptr_d, growptr_, size_b2, cudaMemcpyHostToDevice);
cudaMemcpy(ndim_d, ndim_, size_c, cudaMemcpyHostToDevice);
cudaMemcpy(gx_d, gx_, size_b, cudaMemcpyHostToDevice);
cudaMemcpy(res_d, res_, size_b, cudaMemcpyHostToDevice);

// La organizacion del grid en terminos de los bloques y threads.
// Primera version muy simple para el caso de una matriz 10x10.

//if (BLOCK_SIZE/ndim_(1) >= 512){
// cout << “Quietor!!! Demasiado Thread para tan poco bloque …” << endl;
// system(“pause”);
//}

dim3 dimBlock(BLOCK_SIZE);
dim3 dimGrid(ndim_[1]/BLOCK_SIZE);

// Launch the device computation
Mult_d<<< dimGrid, dimBlock >>>(gval_d, gcolind_d, growptr_d, ndim_d, gx_d, res_d);

// Read result from the device
cudaMemcpy(res_, res_d, size_b, cudaMemcpyDeviceToHost);

// Free device memory
cudaFree(gval_d);
cudaFree(gcolind_d);
cudaFree(growptr_d);
cudaFree(ndim_d);

}

==

DEVICE CODE

==

// DEVICE CODE
// Sparse Matrix-Vector multiplication.

#ifndef MATVECMULT_KERNEL_H
#define MATVECMULT_KERNEL_H

#include <stdio.h>

#include “matvecmult.h”

#endif

// Sparse matrix multiplication A * x = res !!
global void Mult_d(double* gval_, int* gcolind_, int* growptr_, int* ndim_, double* gx_, double* res_){

// Thread index
int tx = threadIdx.x ;

int k_b, k_e, ij_aux ;
double daux = 0;

k_b = growptr_[tx] ;
k_e = growptr_[tx+1];

while (k_b<k_e) {
ij_aux = gcolind_[k_b] ;
daux += gval_[k_b]*gx_[ij_aux];
k_b++;
}

res_[tx] = daux;
}

atortosa · March 20, 2009, 10:10am