OK, here it is
#include <iostream>
#include <fstream>
#include <cuda.h>
#include <curand.h>
#include <cuda_runtime_api.h>
#include <curand_kernel.h>
using namespace std;
#define CUDA_CALL(x) do { if((x) != cudaSuccess) { \
printf("Error at %s:%d\n",__FILE__,__LINE__);\
return EXIT_FAILURE;}} while(0)
#define CURAND_CALL(x) do { if((x) != CURAND_STATUS_SUCCESS) { \
printf("Error at %s:%d\n",__FILE__,__LINE__);\
return EXIT_FAILURE;}} while(0)
__global__ void forca (float2 *pos, float2 *acc, float box, int N, float sigma2, float eps){
float2 del;
float r2;
int i=blockIdx.x*blockDim.x+threadIdx.x;
for (int j=0; j < N; j++) {
if ((i != j) && (i < N) && (j<N)){
del.x=pos[i].x-pos[j].x;
del.y=pos[i].y-pos[j].y;
if (del.x > box/2) {
del.x -= box;
}
else if (del.x < -box/2) {
del.x += box;
}
if (del.y > box/2) {
del.y -= box;
}
else if (del.y < -box/2) {
del.y += box;
}
r2=del.x*del.x+del.y*del.y;
acc[i].x+=24*eps*(2*pow((sigma2/r2),6)-pow((sigma2/r2),3))*del.x/r2 ;
acc[i].y+=24*eps*(2*pow((sigma2/r2),6)-pow((sigma2/r2),3))*del.y/r2;
}
}
}
__global__ void forcapinnin(float2 *pos, float2 *pospinn, float2 *acc, float forcamax, float raiopinn, int N, int npinnings, float box){
int i=blockIdx.x*blockDim.x+threadIdx.x;
float2 del;
float r2,r;
if (i < N) {
for (int j=0; j<npinnings; j++){
del.x=pos[i].x-pospinn[j].x;
del.y=pos[i].y-pospinn[j].y;
if (del.x > box/2) {
del.x -= box;
}
else if (del.x < -box/2) {
del.x += box;
}
if (del.y > box/2) {
del.y -= box;
}
else if (del.y < -box/2) {
del.y += box;
}
r2=del.x*del.x+del.y*del.y;
r=sqrt(r2);
if (( r < raiopinn) && ( r !=0)){
acc[i].x+=del.x*forcamax*(r-raiopinn)/(r*raiopinn);
acc[i].y+=del.y*forcamax*(r-raiopinn)/(r*raiopinn);
}
}
}
}
__global__ void mover (float2 *pos, float2 *acc, float box, float dt, int N,float *randomgauss,float *randomuni) {
int i=threadIdx.x+blockDim.x*blockIdx.x;
float dr=randomgauss[i],dtet=randomuni[i]*2*3.1419;
if (i <N) {
pos[i].x+=cos(dtet)*dr+acc[i].x*dt;
pos[i].y+=sin(dtet)*dr+acc[i].y*dt;
acc[i].x=0;
acc[i].y=0;
if (pos[i].x > box) {
pos[i].x -= box;
}
else if (pos[i].x < 0) {
pos[i].x += box;
}
if (pos[i].y > box) {
pos[i].y -= box;
}
else if (pos[i].y < 0) {
pos[i].y += box;
}
}
}
void checkCUDAError(const char *msg)
{
cudaError_t err = cudaGetLastError();
if( cudaSuccess != err)
{
fprintf(stderr, "Cuda error: %s: %s.\n", msg,
cudaGetErrorString( err) );
system("pause");
exit(EXIT_FAILURE);
}
}
int main(void){
float2 *pos, *acc,*pospinn;
float2 *pos_d, *acc_d,*pospinn_d;
int N,i,counter=0;
float box,dt, t=0.0,eps,sigma2,forcamax,raiopinn,sigma;
size_t nrand,npinnings;
float *randomgauss,*randomuni;
curandGenerator_t gen;
srand(45645645);
std::cout << "Particles: ";
std::cin >> N;
std::cout << "Pinnings: ";
std::cin >> npinnings;
std::cout << "Box: ";
std::cin >> box;
std::cout << "sigma: ";
std::cin >> sigma;
std::cout << "Eps: ";
std::cin >> eps;
std::cout << "dt: ";
std::cin >> dt;
std::cout << "Pinning's force: ";
std::cin >> forcamax;
std::cout << "Pinning's radius: ";
std::cin >> raiopinn;
nrand=N;
sigma2=sigma*sigma;
size_t memSize=N*sizeof(float2);
size_t memSizeRandom=nrand*sizeof(float);
size_t memSizePinn=npinnings*sizeof(float2);
pos=(float2 *) malloc (memSize);
acc=(float2 *) malloc (memSize);
pospinn=(float2 *) malloc (memSizePinn);
cudaMalloc( (void **) &pos_d, memSize );
checkCUDAError("Malloc pos_d");
cudaMalloc( (void **) &acc_d, memSize );
checkCUDAError("Malloc acc_d");
cudaMalloc( (void **) &randomgauss, memSizeRandom);
checkCUDAError("Malloc randomgauss");
cudaMalloc( (void **) &randomuni, memSizeRandom);
checkCUDAError("Malloc randomgauss");
cudaMalloc( (void **) &pospinn_d, memSizePinn);
checkCUDAError("Malloc pospinn_d");
cout << "Ok para cudaMalloc" << endl;
CURAND_CALL(curandCreateGenerator(&gen,CURAND_RNG_PSEUDO_DEFAULT));
CURAND_CALL(curandSetPseudoRandomGeneratorSeed(gen,rand()));
for (i=0; i<N; i++){
pos[i].x=(rand()/(float)RAND_MAX)*box;
pos[i].y=(rand()/(float)RAND_MAX)*box;
pospinn[i].x=(rand()/(float)RAND_MAX)*box;
pospinn[i].y=(rand()/(float)RAND_MAX)*box;
acc[i].x=0;
acc[i].y=0;
}
cudaMemcpy( pos_d, pos, memSize, cudaMemcpyHostToDevice );
checkCUDAError("Memcpy pos");
cudaMemcpy( acc_d, acc, memSize, cudaMemcpyHostToDevice );
checkCUDAError("Memcpy acc");
cudaMemcpy( pospinn_d, pospinn, npinnings*sizeof(float), cudaMemcpyHostToDevice );
checkCUDAError("Memcpy pospinn");
cout << "Ok para cudaMemCpy" << endl;
ofstream trans,inicial,final,pinnings;
trans.open ("trans.dat");
inicial.open("inicial.dat");
pinnings.open("pinnings.dat");
for (i=0;i<N;i++) {
inicial << pos[i].x;
inicial << " ";
inicial << pos[i].y;
inicial << " " <<endl;
}
inicial.close();
for (i=0;i<N;i++) {
pinnings << pospinn[i].x;
pinnings << " ";
pinnings << pospinn[i].y;
pinnings << " " <<endl;
}
pinnings.close();
int nblocksforca=N/256 + (N%256 == 0? 0:1);
int nblocksmove=N/256 + (N%256 == 0? 0:1);
cout << "Ok para nblocks" << endl;
float temp=1000.0,dtemp=-1.,raizdatemp;
while (temp > 0.001) {
temp+=dtemp;
raizdatemp=sqrt(2*temp)*dt;
t=0.0;
cout << temp << endl;
while (t<= 1.0){
t+=dt;
counter+=1;
// cout << "Call randomuni" << endl;
curandGenerateUniform(gen, randomuni, nrand);
// cout << "Finda randomuni" << endl;
// cout << "Call randomgauss" << endl;
curandGenerateNormal(gen, randomgauss, nrand,0.0,raizdatemp);
// cout << "Finda randomgauss" << endl;
forca <<< nblocksforca, 256 >>> (pos_d, acc_d, box, N, sigma2, eps);
checkCUDAError("forca");
forcapinnin <<< nblocksforca, 256 >>> (pos_d, pospinn_d, acc_d, forcamax, raiopinn, N, npinnings, box);
checkCUDAError("forcapinn");
mover <<< nblocksmove, 256 >>> (pos_d, acc_d, box, dt, N, randomgauss,randomuni);
checkCUDAError("mover");
if (counter % 10000 == 0) {
cout << t << endl;
cudaMemcpy( pos, pos_d, memSize, cudaMemcpyDeviceToHost );
checkCUDAError("Memcpy pos2");
for (i=0;i<N;i++) {
trans << pos[i].x;
trans << " ";
trans << pos[i].y;
trans << " " <<endl;
}
trans << endl;
}
}
}
trans.close();
final.open("final.dat");
cudaMemcpy( pos, pos_d, memSize, cudaMemcpyDeviceToHost );
for (i=0;i<N;i++) {
final << pos[i].x;
final << " ";
final << pos[i].y;
final << " " <<endl;
}
final.close();
CURAND_CALL(curandDestroyGenerator(gen));
return 0;
}
sample.cu (6.98 KB)