Compiling Cuda code

Hello…
Could anyone help me to compile and run this program (MatrixAdd.cu) successfully…!

#include<stdio.h>
#include<stdlib.h>

const int N = 1024;
const int blocksize = 16;

global void add_matrix(floata, float b, float* c, int N)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
int j = blockIdx.y * blockDim.y + threadIdx.y;
int index = i + j*N;
if ( i < N && j < N)
c[index]= a[index] + b[index];
}

int main()
{
float a = new float[NN];
float b = new float[NN];
float c = new float[NN];

for ( int i=0; i< N*N; ++i)
{
a[i] = 1.0f;
b[i]= 3.5f;
}

float *ad, bd, cd;
const int size = N
N
sizeof(float);

cudaMalloc((void**)&ad, size);
cudaMalloc((void**)&bd, size);
cudaMalloc((void**)&cd, size);

cudaMemcpy(ad, a, size, cudaMemcpyHostToDevice)
cudaMemcpy(bd, b, size, cudaMemcpyHostToDevice)

dim3 dimBlock(blocksize, blocksize);
dim3 dimGrid(N/dimBlock.x, N/dimBlock.y);
add_matrix<<<dimGrid, dimBlock>>>(ad, bd, cd, N);

cudaMemcpy(c, cd, size, cudaMemcpyDeviceToHost);

cudaFree(ad);
cudaFree(bd);
cudaFree(cd);

delete a;
delete b;
delete c;

return EXIT_SUCCESS;

}

AND I HAVE GOT THE FOLLOWING ERRORS:-

MatrixAdd.cu(9): error: this declaration has no storage class or type specifier

MatrixAdd.cu(9): error: expected a “;”

MatrixAdd.cu(46): warning: parsing restarts here after previous syntax error

MatrixAdd.cu(47): error: identifier “dimBlock” is undefined

MatrixAdd.cu(48): error: this declaration has no storage class or type specifier

MatrixAdd.cu(48): error: expected a “;”

MatrixAdd.cu(50): error: this declaration has no storage class or type specifier

MatrixAdd.cu(50): error: declaration is incompatible with “cudaError_t cudaMemcpy(void *, const void *, size_t, cudaMemcpyKind)”
/usr/local/cuda/bin/…/include/cuda_runtime_api.h(120): here

MatrixAdd.cu(50): error: identifier “c” is undefined

MatrixAdd.cu(50): error: expected a “)”

MatrixAdd.cu(52): error: this declaration has no storage class or type specifier

MatrixAdd.cu(52): error: declaration is incompatible with “cudaError_t cudaFree(void *)”
/usr/local/cuda/bin/…/include/cuda_runtime_api.h(106): here

MatrixAdd.cu(52): error: identifier “ad” is undefined

MatrixAdd.cu(53): error: this declaration has no storage class or type specifier

MatrixAdd.cu(53): error: variable “cudaFree” has already been defined

MatrixAdd.cu(53): error: identifier “bd” is undefined

MatrixAdd.cu(54): error: this declaration has no storage class or type specifier

MatrixAdd.cu(54): error: variable “cudaFree” has already been defined

MatrixAdd.cu(54): error: identifier “cd” is undefined

MatrixAdd.cu(56): error: expected a declaration

MatrixAdd.cu(57): error: expected a declaration

MatrixAdd.cu(58): error: expected a declaration

MatrixAdd.cu(61): error: expected a declaration

MatrixAdd.cu(63): error: expected a declaration

MatrixAdd.cu(7): warning: variable “blocksize” was declared but never referenced

23 errors detected in the compilation of “/tmp/tmpxft_000049d3_00000000-4_MatrixAdd.cpp1.ii”.

Thank you for help!

Try including cuda.h, and maybe reading some introductory C or C++ programming material. If you are having trouble compiling what amounts to a 20 or 30 line long CUDA “hello world” equivalent, I think you are going to struggle with serious CUDA programming until you have at least an elementary grasp of C/C++ and the toolchain.

I get the same type of error

[codebox]

fdtd1d_yee_mult_lnx.cu(34): error: this declaration has no storage class or type specifier

fdtd1d_yee_mult_lnx.cu(34): error: expected a “;”

[/codebox]

But I feel that I have included the proper libraries:

[codebox]

#include <math.h>

#include <stdio.h>

#include “cuda/cuda.h”

//#include “idl_export.h” CHANGED

//CHANGED IDL_LONG TO int

#define PI 3.14159265358979323846264338327950288

#define LIMIT_UNDERFLOW 1.0e-6

#define SIZE_IDL 4 //Number of bytes in this data type.

#define SIZE_DOUBLE 8 //Number of bytes in double[/codebox]

and the function prototype looks like

[codebox]global void for1(double *ExyzG,double *BxyzG,double *wxG);[/codebox]

Any suggestions?

global has two underscores on both sides of the word global.

global ->correct
global ->wrong

N.

Doh! Thank you very much, I never would have noticed that! The compiler is giving me a new error now with this part of the code:

[codebox]

91: cudaMalloc((void**)&ExyzG, SIZE_DOUBLE);

92: cudaMalloc((void**)&BxyzG, SIZE_DOUBLE);

93: cudaMalloc((void**)&imxG, SIZE_IDL);

94: cudaMalloc((void**)&wxG, SIZE_DOUBLE);

95: cudaMemcpy(ExyzG, Exyz, SIZE_DOUBLE, cudaMemcpyHostToDevice);

[/codebox]

The error is:

[codebox]fdtd1d_yee_mult_lnx.cu(95): error: argument of type “double” is incompatible with parameter of type “void *”

[/codebox]

ExyzG and Exyz are both doubles and it seems that I have allocated the correct amount for this SIZE_DOULBE = 8

cudaMemcpy requires pointers as arguments, not the values themselves. I believe ExyzG is a pointer, but Exyz isn’t. Try

cudaMemcpy(ExyzG, &Exyz, SIZE_DOUBLE, cudaMemcpyHostToDevice);

N.

The if should be: (( i < N )&&( j < N)

You’re missing semicolons after the cudaMemcpy

But yeah, get used to programming in C before you tackle CUDA- most of those error messages are very common and suggest bad syntax or a missing semicolon.

(doublepost edited away)

There is a very old bit of netiquette - goes back to when NNTP was the favoured way of exchanging info between people.

It says, more or less: if you have nothing useful to say, say nothing.

Your answer does nothing to inform susan7 of her error, but merely points out that she has made an error, and does so in an insulting manner.

If you are good enough to see her errors, please be good enough to explain them - and do try to do so in a respectful manner.

Hello,

First of all you need to include the cuda header,

#include <cuda.h>

also, cuda directives and functions always starts with two underline symbols, not one, so your global becomes global.

and you forgot to put semicolons after the cudaMemcpy.

I changed that and I managed to compile the code, but not executed it, that is your homework :P

[codebox]#include<stdio.h>

#include<stdlib.h>

#include <cuda.h>

const int N = 1024;

const int blocksize = 16;

global void add_matrix(float* a, float* b, float* c, int N)

{

int i = blockIdx.x * blockDim.x + threadIdx.x;

int j = blockIdx.y * blockDim.y + threadIdx.y;

int index = i + j*N;

if ( i < N && j < N)

c[index]= a[index] + b[index];

}

int main()

{

float a = new float[NN];

float b = new float[NN];

float c = new float[NN];

for ( int i=0; i< N*N; ++i)

{

a[i] = 1.0f;

b[i]= 3.5f;

}

float *ad, *bd, *cd;

const int size = NNsizeof(float);

cudaMalloc((void**)&ad, size);

cudaMalloc((void**)&bd, size);

cudaMalloc((void**)&cd, size);

cudaMemcpy(ad, a, size, cudaMemcpyHostToDevice);

cudaMemcpy(bd, b, size, cudaMemcpyHostToDevice);

dim3 dimBlock(blocksize, blocksize);

dim3 dimGrid(N/dimBlock.x, N/dimBlock.y);

add_matrix<<<dimGrid, dimBlock>>>(ad, bd, cd, N);

cudaMemcpy(c, cd, size, cudaMemcpyDeviceToHost);

cudaFree(ad);

cudaFree(bd);

cudaFree(cd);

delete a;

delete b;

delete c;

return EXIT_SUCCESS;

}

[/codebox]

Susan7:

I suspect you are a Python programmer. Is this correct? If so maybe take a look at pyCUDA (http://mathema.tician.de/software/pycuda). I’m just a python tinkerer so I can’t say how good it is.