Vector Multiplication

Hello.

I have a program that multiplies two vectors on CPU. This program should I implement with CUDA-kernel. How can I do that?

Sorry I’m new with CUDA.

I use 64-Bit Computer

GK: NVidia GeForce GT120

Thanks

#include <stdio.h>

#include <stdlib.h>

#include <sys/time.h>

struct timeval  start,stop;

// *******************************************

void CPU_VectMul (float *a, float *b, float *result, int length)

{

int ll;

for (ll = 0; ll < length; ll++)

    result[ll] = a[ll] * b[ll];

}

// *******************************************

double CheckResults ( float *result, int length)

{

int ll;

double summe = 0.0;

for (ll = 0; ll < length; ll++)

   summe+=result[ll];

return (summe);

}

// *******************************************

void Initialize  (float *a, float *b, float *result, int length)

{

int ll;

for (ll = 0; ll < length; ll++)

   {

   a[ll] = 1.0 + (double)ll/(double)length;

   b[ll] = 0.0 + (double)ll/(double)length;

   result[ll] = 0.0;              

   }

}

// *******************************************

void Start() 

    {

    gettimeofday (&start, NULL);

    }

// *******************************************

void Stop() 

    {

    gettimeofday (&stop, NULL);

    }

// *******************************************

double Walltime ()

    {

    double elapsed = (stop.tv_sec - start.tv_sec)*1000.0;

    elapsed += ((stop.tv_usec - start.tv_usec)/1000.0);

    return (elapsed);

    }

// *******************************************

int main (int argc, char **argv)

{

int 		length, r, reps;

float 		*Vector1, *Vector2, *ResultVector;

if (argc<3) {

  printf ("vectmul <vector length> <repetitions>\n");exit(1);

  }

length = atoi(argv[1]);

if ((length < 100)||(length > 100000000)) {

   printf ("vector length: out of range\n");exit(1);

   }

reps = atoi(argv[2]);

Vector1 = (float*)malloc (length*sizeof(float));

Vector2 = (float*)malloc (length*sizeof(float));

ResultVector = (float*)malloc (length*sizeof(float));

if (!(Vector1&&Vector2&&ResultVector)) { printf ("malloc error\n"); exit(1);}

printf ("memory usage: %ld bytes\n",(length*sizeof(float))*3);

Start();

   Initialize(Vector1, Vector2, ResultVector, length);

  Stop(); 

printf ("init time: %5.5f ms\n",Walltime());

Start();

for (r = 0 ; r < reps; r++)

      CPU_VectMul (Vector1, Vector2, ResultVector, length);

Stop(); 

printf ("CPU execution time: %5.5f ms\n",Walltime());

printf ("CPU result: %16.16e\n", CheckResults(ResultVector, length));

return (0);

}

Hello.

I have a program that multiplies two vectors on CPU. This program should I implement with CUDA-kernel. How can I do that?

Sorry I’m new with CUDA.

I use 64-Bit Computer

GK: NVidia GeForce GT120

Thanks

#include <stdio.h>

#include <stdlib.h>

#include <sys/time.h>

struct timeval  start,stop;

// *******************************************

void CPU_VectMul (float *a, float *b, float *result, int length)

{

int ll;

for (ll = 0; ll < length; ll++)

    result[ll] = a[ll] * b[ll];

}

// *******************************************

double CheckResults ( float *result, int length)

{

int ll;

double summe = 0.0;

for (ll = 0; ll < length; ll++)

   summe+=result[ll];

return (summe);

}

// *******************************************

void Initialize  (float *a, float *b, float *result, int length)

{

int ll;

for (ll = 0; ll < length; ll++)

   {

   a[ll] = 1.0 + (double)ll/(double)length;

   b[ll] = 0.0 + (double)ll/(double)length;

   result[ll] = 0.0;              

   }

}

// *******************************************

void Start() 

    {

    gettimeofday (&start, NULL);

    }

// *******************************************

void Stop() 

    {

    gettimeofday (&stop, NULL);

    }

// *******************************************

double Walltime ()

    {

    double elapsed = (stop.tv_sec - start.tv_sec)*1000.0;

    elapsed += ((stop.tv_usec - start.tv_usec)/1000.0);

    return (elapsed);

    }

// *******************************************

int main (int argc, char **argv)

{

int 		length, r, reps;

float 		*Vector1, *Vector2, *ResultVector;

if (argc<3) {

  printf ("vectmul <vector length> <repetitions>\n");exit(1);

  }

length = atoi(argv[1]);

if ((length < 100)||(length > 100000000)) {

   printf ("vector length: out of range\n");exit(1);

   }

reps = atoi(argv[2]);

Vector1 = (float*)malloc (length*sizeof(float));

Vector2 = (float*)malloc (length*sizeof(float));

ResultVector = (float*)malloc (length*sizeof(float));

if (!(Vector1&&Vector2&&ResultVector)) { printf ("malloc error\n"); exit(1);}

printf ("memory usage: %ld bytes\n",(length*sizeof(float))*3);

Start();

   Initialize(Vector1, Vector2, ResultVector, length);

  Stop(); 

printf ("init time: %5.5f ms\n",Walltime());

Start();

for (r = 0 ; r < reps; r++)

      CPU_VectMul (Vector1, Vector2, ResultVector, length);

Stop(); 

printf ("CPU execution time: %5.5f ms\n",Walltime());

printf ("CPU result: %16.16e\n", CheckResults(ResultVector, length));

return (0);

}