NVIDIA Developer Forums

Vector Multiplication

Accelerated Computing CUDA CUDA Programming and Performance

nasil122002 November 21, 2010, 12:10pm 1

Hello.

I have a program that multiplies two vectors on CPU. This program should I implement with CUDA-kernel. How can I do that?

Sorry I’m new with CUDA.

I use 64-Bit Computer

GK: NVidia GeForce GT120

Thanks

#include <stdio.h>

#include <stdlib.h>

#include <sys/time.h>

struct timeval  start,stop;

// *******************************************

void CPU_VectMul (float *a, float *b, float *result, int length)

{

int ll;

for (ll = 0; ll < length; ll++)

    result[ll] = a[ll] * b[ll];

}

// *******************************************

double CheckResults ( float *result, int length)

{

int ll;

double summe = 0.0;

for (ll = 0; ll < length; ll++)

   summe+=result[ll];

return (summe);

}

// *******************************************

void Initialize  (float *a, float *b, float *result, int length)

{

int ll;

for (ll = 0; ll < length; ll++)

   {

   a[ll] = 1.0 + (double)ll/(double)length;

   b[ll] = 0.0 + (double)ll/(double)length;

   result[ll] = 0.0;              

   }

}

// *******************************************

void Start() 

    {

    gettimeofday (&start, NULL);

    }

// *******************************************

void Stop() 

    {

    gettimeofday (&stop, NULL);

    }

// *******************************************

double Walltime ()

    {

    double elapsed = (stop.tv_sec - start.tv_sec)*1000.0;

    elapsed += ((stop.tv_usec - start.tv_usec)/1000.0);

    return (elapsed);

    }

// *******************************************

int main (int argc, char **argv)

{

int 		length, r, reps;

float 		*Vector1, *Vector2, *ResultVector;

if (argc<3) {

  printf ("vectmul <vector length> <repetitions>\n");exit(1);

  }

length = atoi(argv[1]);

if ((length < 100)||(length > 100000000)) {

   printf ("vector length: out of range\n");exit(1);

   }

reps = atoi(argv[2]);

Vector1 = (float*)malloc (length*sizeof(float));

Vector2 = (float*)malloc (length*sizeof(float));

ResultVector = (float*)malloc (length*sizeof(float));

if (!(Vector1&&Vector2&&ResultVector)) { printf ("malloc error\n"); exit(1);}

printf ("memory usage: %ld bytes\n",(length*sizeof(float))*3);

Start();

   Initialize(Vector1, Vector2, ResultVector, length);

  Stop(); 

printf ("init time: %5.5f ms\n",Walltime());

Start();

for (r = 0 ; r < reps; r++)

      CPU_VectMul (Vector1, Vector2, ResultVector, length);

Stop(); 

printf ("CPU execution time: %5.5f ms\n",Walltime());

printf ("CPU result: %16.16e\n", CheckResults(ResultVector, length));

return (0);

}

nasil122002 November 21, 2010, 12:10pm 2

Hello.

I have a program that multiplies two vectors on CPU. This program should I implement with CUDA-kernel. How can I do that?

Sorry I’m new with CUDA.

I use 64-Bit Computer

GK: NVidia GeForce GT120

Thanks

#include <stdio.h>

#include <stdlib.h>

#include <sys/time.h>

struct timeval  start,stop;

// *******************************************

void CPU_VectMul (float *a, float *b, float *result, int length)

{

int ll;

for (ll = 0; ll < length; ll++)

    result[ll] = a[ll] * b[ll];

}

// *******************************************

double CheckResults ( float *result, int length)

{

int ll;

double summe = 0.0;

for (ll = 0; ll < length; ll++)

   summe+=result[ll];

return (summe);

}

// *******************************************

void Initialize  (float *a, float *b, float *result, int length)

{

int ll;

for (ll = 0; ll < length; ll++)

   {

   a[ll] = 1.0 + (double)ll/(double)length;

   b[ll] = 0.0 + (double)ll/(double)length;

   result[ll] = 0.0;              

   }

}

// *******************************************

void Start() 

    {

    gettimeofday (&start, NULL);

    }

// *******************************************

void Stop() 

    {

    gettimeofday (&stop, NULL);

    }

// *******************************************

double Walltime ()

    {

    double elapsed = (stop.tv_sec - start.tv_sec)*1000.0;

    elapsed += ((stop.tv_usec - start.tv_usec)/1000.0);

    return (elapsed);

    }

// *******************************************

int main (int argc, char **argv)

{

int 		length, r, reps;

float 		*Vector1, *Vector2, *ResultVector;

if (argc<3) {

  printf ("vectmul <vector length> <repetitions>\n");exit(1);

  }

length = atoi(argv[1]);

if ((length < 100)||(length > 100000000)) {

   printf ("vector length: out of range\n");exit(1);

   }

reps = atoi(argv[2]);

Vector1 = (float*)malloc (length*sizeof(float));

Vector2 = (float*)malloc (length*sizeof(float));

ResultVector = (float*)malloc (length*sizeof(float));

if (!(Vector1&&Vector2&&ResultVector)) { printf ("malloc error\n"); exit(1);}

printf ("memory usage: %ld bytes\n",(length*sizeof(float))*3);

Start();

   Initialize(Vector1, Vector2, ResultVector, length);

  Stop(); 

printf ("init time: %5.5f ms\n",Walltime());

Start();

for (r = 0 ; r < reps; r++)

      CPU_VectMul (Vector1, Vector2, ResultVector, length);

Stop(); 

printf ("CPU execution time: %5.5f ms\n",Walltime());

printf ("CPU result: %16.16e\n", CheckResults(ResultVector, length));

return (0);

}

Topic		Replies	Views	Activity
Vector Vector Multiplication CUDA Programming and Performance	1	790	April 6, 2011
Is to possible to speed up multiple matrix per vector multiplication using CUDA? CUDA Programming and Performance	2	1461	April 12, 2010
Matrix Multiplication CUDA Programming and Performance	1	3299	March 28, 2009
Matrix by vector multiplication CUDA Programming and Performance	4	982	June 16, 2013
Vector Vector Multiplication Code : Error Basic Vector Vector Multiplication code CUDA Programming and Performance	5	7456	May 21, 2008
vector matrix multiplication, share my code:) CUDA Programming and Performance	1	5883	October 11, 2011
How do I multiply the corresponding elements of vector A and vector B? CUDA Programming and Performance cuda	5	396	August 27, 2023
Vector-Matrix Multiplication is this a fast kernel? CUDA Programming and Performance	6	1880	April 15, 2010
2D Matrix Multiply with cuda....I made it all but time duration is something strange. CUDA Programming and Performance	0	399	December 6, 2018
Pls help - Matrix multiplication CUDA Programming and Performance	0	736	February 9, 2011