Hello.
I have a program that multiplies two vectors on CPU. This program should I implement with CUDA-kernel. How can I do that?
Sorry I’m new with CUDA.
I use 64-Bit Computer
GK: NVidia GeForce GT120
Thanks
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
struct timeval start,stop;
// *******************************************
void CPU_VectMul (float *a, float *b, float *result, int length)
{
int ll;
for (ll = 0; ll < length; ll++)
result[ll] = a[ll] * b[ll];
}
// *******************************************
double CheckResults ( float *result, int length)
{
int ll;
double summe = 0.0;
for (ll = 0; ll < length; ll++)
summe+=result[ll];
return (summe);
}
// *******************************************
void Initialize (float *a, float *b, float *result, int length)
{
int ll;
for (ll = 0; ll < length; ll++)
{
a[ll] = 1.0 + (double)ll/(double)length;
b[ll] = 0.0 + (double)ll/(double)length;
result[ll] = 0.0;
}
}
// *******************************************
void Start()
{
gettimeofday (&start, NULL);
}
// *******************************************
void Stop()
{
gettimeofday (&stop, NULL);
}
// *******************************************
double Walltime ()
{
double elapsed = (stop.tv_sec - start.tv_sec)*1000.0;
elapsed += ((stop.tv_usec - start.tv_usec)/1000.0);
return (elapsed);
}
// *******************************************
int main (int argc, char **argv)
{
int length, r, reps;
float *Vector1, *Vector2, *ResultVector;
if (argc<3) {
printf ("vectmul <vector length> <repetitions>\n");exit(1);
}
length = atoi(argv[1]);
if ((length < 100)||(length > 100000000)) {
printf ("vector length: out of range\n");exit(1);
}
reps = atoi(argv[2]);
Vector1 = (float*)malloc (length*sizeof(float));
Vector2 = (float*)malloc (length*sizeof(float));
ResultVector = (float*)malloc (length*sizeof(float));
if (!(Vector1&&Vector2&&ResultVector)) { printf ("malloc error\n"); exit(1);}
printf ("memory usage: %ld bytes\n",(length*sizeof(float))*3);
Start();
Initialize(Vector1, Vector2, ResultVector, length);
Stop();
printf ("init time: %5.5f ms\n",Walltime());
Start();
for (r = 0 ; r < reps; r++)
CPU_VectMul (Vector1, Vector2, ResultVector, length);
Stop();
printf ("CPU execution time: %5.5f ms\n",Walltime());
printf ("CPU result: %16.16e\n", CheckResults(ResultVector, length));
return (0);
}