/* * description: * notes: * */ #include #include #include #include #include #include "partic.h" #define BLOCK_SIZE 16 //--------------------------------------------------------------------------------------- __global__ void cu_calcEnergie( Energie *En, int np, float Ka ) { float r, sig, d, rM= Ka*10; Energie *F, *E; Vector vt; Vector *P=&vt, *M, *V; int j, x, y, n, n_gotEn; x= blockIdx.x*BLOCK_SIZE+threadIdx.x; y= blockIdx.y*BLOCK_SIZE+threadIdx.y; n= y*np+ x; if( n >= np ) return; E= En+n; sig= Ka*Ka*16; // vorher 2*16*Ka*Ka; Daempfung M= &E->pos; n_gotEn= 0; for( j=1, F= En+j; j < np; ++j, ++F ) { if ( E == F ) continue; // nicht selbst V= &F->pos; P->x= V->x - M->x; // Diff. Vectoren P->y= V->y - M->y; P->z= V->z - M->z; if(( r= sqrtf(P->x*P->x+P->y*P->y+P->z*P->z)) > rM ) continue; // Abstand n_gotEn += 1; d= -Ka* expf(-r*r/sig); V= &E ->velo; // Addv3f( &E ->velo, P, d); V->x += P->x * d; V->y += P->y * d; V->z += P->z * d; } if( !n_gotEn ) { V->x *= 0.10; V->y *= 0.10; V->z *= 0.10; } // was 0.25 P= &E->pos; V= &E->velo; // Update Position P->x += V->x; P->y += V->y; P->z += V->z; } //--------------------------------------------------------------------------------------- __device__ __constant__ float Ka; __device__ __constant__ int Ro; void calcPartic( Energie *En, int np, float Ka, int Ro ) { cudaError_t result; Energie *En_d; fprintf(stdout,"\tCuda2: np %d\n",np); fflush(stdout); #undef TIMING #ifdef TIMING unsigned int timer=0; cutCreateTimer(&timer); cutStartTimer (timer); #endif result= cudaMalloc( (void**)&En_d, sizeof(Energie)*np ); if (result != cudaSuccess) { printf("cudaMalloc failed - En_d \n"); exit(1); } result= cudaMemcpy( En_d, En, sizeof(Energie)*np, cudaMemcpyHostToDevice); if (result != cudaSuccess) { printf("cudaMemcpy - Host-> GPU failed - En_d \n"); exit(1); } #ifdef TIMING cutStopTimer(timer); printf(" MemCopy -> Dev. time: %8.3f (ms)\n",cutGetTimerValue(timer)); cutResetTimer(timer); cutStartTimer(timer); #endif // execution configuration... // Indicate the dimension of the block && the dimension of the grid dim3 dimblock( BLOCK_SIZE,BLOCK_SIZE, 1); // <512 dim3 dimgrid ( np/(BLOCK_SIZE*BLOCK_SIZE)+1); // (!) 1 , <65535 cu_calcEnergie<<>>( En_d, np, Ka ); #ifdef TIMING cutStopTimer(timer); printf(" Cuda Processing time: %8.3f (ms)\n",cutGetTimerValue(timer)); cutResetTimer(timer); cutStartTimer(timer); #endif result= cudaMemcpy( En, En_d, sizeof(Energie)*np, cudaMemcpyDeviceToHost); if (result != cudaSuccess) { printf(" \n *** cudaMemcpy GPU -> Host failed !\n"); exit(1); } #ifdef TIMING cutStopTimer(timer); printf(" MemCopy Back time: %8.3f (ms)\n\n",cutGetTimerValue(timer)); #endif cudaFree(En_d); } //#ifndef RAYN extern "C" void Partic( Energie *En, int np, float Ka, int Ro ) { calcPartic( En, np, Ka, Ro ); } //#endif //-------------------------------------------------------------------