Hello,
I hope that you can help me.
I have a function “Host_Mix (int * Q, float * f, float * J)” written without Cuda. after that I’ve been using Cuda … But I do not get similar results … In addition, each time get new result. I think I got errors in the Synchronization.
I thank you in advance.
Amer
//**********************
#include “cuda_runtime.h”
#include “device_launch_parameters.h”
#include <stdio.h>
#include
const int NQQ=5;
const int NK=9;
// *****************************************************
global void Mix_kern(int *Q, float *f, float J)
{
int i=blockIdx.x;
int q=i blockDim.x;
float Jq= f[ Q[q+0]-1]*f[ Q[q+2]-1]-f[ Q[q+1]-1] *f[ Q[q+3]-1];
J[ Q[q+0]-1]-=Jq;
J[ Q[q+1]-1]+=Jq;
J[ Q[q+2]-1]-=Jq;
J[ Q[q+3]-1]+=Jq;
}
void Cuda_Mix(int Q, float f,float J)
{
size_t sQ =4NQQsizeof(int);
size_t sf=NKsizeof(float);
//size_t sjf=NQQ*sizeof(float);
std::fill(J, J+NK, 0);
int *dQ;
cudaMalloc((void**)&dQ, sQ); cudaMemcpy(dQ,Q,sQ,cudaMemcpyHostToDevice);
float *df, *dJ;
cudaMalloc((void**)&df, sf); cudaMemcpy(df,f,sf,cudaMemcpyHostToDevice);
cudaMalloc((void**)&dJ, sf); cudaMemcpy(dJ,J,sf,cudaMemcpyHostToDevice);
Mix_kern<<<NQQ,4>>>(dQ,df,dJ);
cudaMemcpy(J,dJ,sf,cudaMemcpyDeviceToHost);
cudaFree(dJ);
cudaFree(df);
cudaFree(dQ);
// getchar();
/* */
}
// **
void Host_Mix(int Q,float f,float J);
//******************************
void main()
{
int Q[5][4]={ {1,2,5,4} , {2,3,6,5}, {4,5,8,7}, {5,6,9,8},{2,6,8,4} };
float f[9]={1,2,3,4,5,6,7,8,9};
float Jd[9],Jh[9];
Cuda_Mix(&Q[0][0],f,Jd);
Host_Mix(&Q[0][0],f,Jh);
for(int i=0;i<NK;i++)
printf(“%5.2f \t %5.2f \n”,Jd[i],Jh[i]);
getchar();
}
// ***** Host ***** Host ***** Host ***** Host ***** Host ***** Host ***** Host ***** Host
void Host_Mix(int *Q,float *f,float *J)
{
std::fill(J, J+NK, 0);
for(int q=0;q<NQQ;q++)
{
float Jq= f[ Q[q+0]-1]*f[ Q[q+2]-1]-f[ Q[q+1]-1] *f[ Q[q+3]-1];
J[ Q[q+0]-1]-=Jq;
J[ Q[q+1]-1]+=Jq;
J[ Q[q+2]-1]-=Jq;
J[ Q[q+3]-1]+=Jq;
}
}
//*********************