กกกก#include #include #define MATRIXSIZE 480*480 #define XYLENGTH 480 #define BLOCK_LENGTH 16 #define AR_INDEX(ar , a, b) (ar[ a*XYLENGTH + b ]) //declaration of the matrices used in the host memeory float* h_ga; float* h_dz; float* h_ez; float* h_hx; float* h_hy; float* h_ihx; float* h_ihy; float* h_gi2; float* h_gj2; float* h_gi3; float* h_gj3; float* h_fi1; float* h_fj1; float* h_fi2; float* h_fj2; float* h_fi3; float* h_fj3; //declaration of the matrices used in the kernal __device__ float* d_ga; __device__ float* d_dz; __device__ float* d_ez; __device__ float* d_hx; __device__ float* d_hy; __device__ float* d_ihx; __device__ float* d_ihy; __device__ float* d_gi2; __device__ float* d_gj2; __device__ float* d_gi3; __device__ float* d_gj3; __device__ float* d_fi1; __device__ float* d_fj1; __device__ float* d_fi2; __device__ float* d_fj2; __device__ float* d_fi3; __device__ float* d_fj3; // Kernel that executes on the CUDA device __global__ void UpdateDZfield(int time) { // the coordinate for sinusoidal source int ic = XYLENGTH /2 -5; int jc = XYLENGTH /2 -5; float ddx = 0.01; // cell size float dt = ddx / 6e8; // time step float pulse; float pi = 3.14159; //Block index int bx = blockIdx.x; int by = blockIdx.y; // Tread index int tx = threadIdx.x; int ty = threadIdx.y; // coordinates location in the global matrix int x = bx * BLOCK_LENGTH + tx; int y = by *BLOCK_LENGTH + ty; if( x== 0 || y ==0){ // boundary conditions do nothing }else{ // update the dz field AR_INDEX(d_dz, x, y) = d_gi3[x] * d_gj3[y]* AR_INDEX( d_dz, x, y) + d_gi2[x]* d_gj2[y] * 0.5 *( AR_INDEX( d_hy, x ,y) - AR_INDEX( d_hy, x-1 ,y) - AR_INDEX( d_hx, x ,y) + AR_INDEX( d_hx, x ,y-1) ); } // sinusoidal sources pulse = sinf(2*pi*1500*1e6*dt*time); if( x ==ic && y ==jc) AR_INDEX(d_dz, x, y) = pulse; } // end of the UpdateDZfield(int time) __global__ void UpdateEZfield( ) { //Block index int bx = blockIdx.x; int by = blockIdx.y; // Tread index int tx = threadIdx.x; int ty = threadIdx.y; // coordinates location in the global matrix int x = bx * BLOCK_LENGTH + tx; int y = by *BLOCK_LENGTH + ty; //calculation of EZ field AR_INDEX(d_ez, x, y) = AR_INDEX(d_ga, x, y) * AR_INDEX(d_dz, x,y); //set the Ez edges to 0, as part of the PML if( x==0 || x == (480-1) || y==0 || y==(480-1)) AR_INDEX(d_ez,x,y) = 0.0; }// end of the UpdateEZfield( ) __global__ void UpdateHfield( ) { float curl_e; //Block index int bx = blockIdx.x; int by = blockIdx.y; // Tread index int tx = threadIdx.x; int ty = threadIdx.y; // coordinates location in the global matrix int x = bx * BLOCK_LENGTH + tx; int y = by * BLOCK_LENGTH + ty; //calculation of Hx field if(y == (480-1)) { // the boundary condition,do nothing } else{ curl_e = AR_INDEX(d_ez, x,y) - AR_INDEX(d_ez, x,y+1); AR_INDEX(d_ihx, x, y) = AR_INDEX(d_ihx, x, y) + d_fi1[x]* curl_e; AR_INDEX(d_hx, x, y) = d_fj3[y]* AR_INDEX(d_hx,x,y) + d_fj2[y]*0.5*(curl_e + AR_INDEX( d_ihx,x,y) ); } //calculation of Hy field if(x == (480-1)) { // the boundary condition ,do nothing }else{ curl_e = AR_INDEX(d_ez, x+1,y) - AR_INDEX(d_ez, x,y); AR_INDEX(d_ihy, x, y) = AR_INDEX(d_ihy, x, y) + d_fj1[y]* curl_e; AR_INDEX(d_hy, x, y) = d_fi3[x]* AR_INDEX(d_hy,x,y) + d_fi2[x]*0.5*(curl_e + AR_INDEX( d_ihy,x,y) ); } } // end of the UpdateHfield( ) // main driver that executes on the host int main(void) { // declare variables used in FDTD simulation int l, n ,i , j, nsteps, npml,T; float ddx, dt,epsz, pi, epsilon, sigma, eaf; float xn, xxn, xnum, xd, curl_e; float t0, spread; //assign a file to record the result FILE *fp,*fopen(); // allocate host memory for matrices and arrays unsigned int mem_size_matrix = sizeof(float) * MATRIXSIZE; h_ga = (float*) malloc(mem_size_matrix); h_dz = (float*) malloc(mem_size_matrix); h_ez = (float*) malloc(mem_size_matrix); h_hx = (float*) malloc(mem_size_matrix); h_hy = (float*) malloc(mem_size_matrix); h_ihx = (float*) malloc(mem_size_matrix); h_ihy = (float*) malloc(mem_size_matrix); unsigned int mem_size_array = sizeof(float) * XYLENGTH; h_gi2 = (float*) malloc(mem_size_array); h_gj2 = (float*) malloc(mem_size_array); h_gi3 = (float*) malloc(mem_size_array); h_gj3 = (float*) malloc(mem_size_array); h_fi1 = (float*) malloc(mem_size_array); h_fj1 = (float*) malloc(mem_size_array); h_fi2 = (float*) malloc(mem_size_array); h_fj2 = (float*) malloc(mem_size_array); h_fi3 = (float*) malloc(mem_size_array); h_fj3 = (float*) malloc(mem_size_array); // allocate device memory for matrices and arrays cudaMalloc( (void**) & d_ga, mem_size_matrix); cudaMalloc( (void**) & d_dz, mem_size_matrix); cudaMalloc( (void**) & d_ez, mem_size_matrix); cudaMalloc( (void**) & d_hx, mem_size_matrix); cudaMalloc( (void**) & d_hy, mem_size_matrix); cudaMalloc( (void**) & d_ihx, mem_size_matrix); cudaMalloc( (void**) & d_ihy, mem_size_matrix); cudaMalloc( (void**) & d_gi2, mem_size_array ); cudaMalloc( (void**) & d_gj2, mem_size_array ); cudaMalloc( (void**) & d_gi3, mem_size_array ); cudaMalloc( (void**) & d_gj3, mem_size_array ); cudaMalloc( (void**) & d_fi1, mem_size_array ); cudaMalloc( (void**) & d_fj1, mem_size_array ); cudaMalloc( (void**) & d_fi2, mem_size_array ); cudaMalloc( (void**) & d_fj2, mem_size_array ); cudaMalloc( (void**) & d_fi3, mem_size_array ); cudaMalloc( (void**) & d_fj3, mem_size_array ); //assign the value to the variables declared epsz = 8.8e-12; // initialize the matrix for( i =0; i< XYLENGTH ; i++) { for( j =0; j< XYLENGTH;j++){ AR_INDEX(h_dz , i , j)= 0.0; AR_INDEX(h_ez , i , j)= 0.0; AR_INDEX(h_hx , i , j)= 0.0; AR_INDEX(h_hy , i , j)= 0.0; AR_INDEX(h_ihx , i , j)= 0.0; AR_INDEX(h_ihy , i , j)= 0.0; AR_INDEX(h_ga , i , j) = 1.0; } // end inner for loop } // end outer for loop // calculate the PML parameters for( i=0;i"); scanf("%d", &npml); for( i =0; i <= npml; i++){ xnum = npml -i; xd = npml; xxn = xnum/xd; xn = 0.33 *pow(xxn,3); h_gi2[i] = 1.0 / (1.0 + xn); h_gi2[XYLENGTH-1-i] = 1.0 /(1.0 + xn) ; h_gi3[i] = (1.0 -xn)/ (1.0 +xn); h_gi3[XYLENGTH-1-i] = (1.0 -xn)/ (1.0 +xn); xxn = (xnum -.5) /xd; xn = 0.25 * pow(xxn,3); h_fi1[i] = xn; h_fi1[XYLENGTH-2-i] =xn; h_fi2[i] = 1.0 / (1.0 + xn); h_fi2[XYLENGTH-2-i] = 1.0 /(1.0 + xn) ; h_fi3[i] = (1.0 -xn)/ (1.0 +xn); h_fi3[XYLENGTH-2-i] = (1.0 -xn)/ (1.0 +xn); } for( j =0; j<=npml; j++){ xnum = npml -j; xd = npml; xxn = xnum /xd; xn = 0.33 * pow(xxn, 3); h_gj2[XYLENGTH-1-j] = 1.0 /(1.0 + xn) ; h_gj3[j] = (1.0 -xn)/ (1.0 +xn); h_gj3[XYLENGTH-1-j] = (1.0 -xn)/ (1.0 +xn); xxn = (xnum -.5) /xd; xn = 0.25 * pow(xxn,3); h_fj1[j] = xn; h_fj1[XYLENGTH-2-j] =xn; h_fj2[j] = 1.0 / (1.0 + xn); h_fj2[XYLENGTH-2-j] = 1.0 /(1.0 + xn) ; h_fj3[j] = (1.0 -xn)/ (1.0 +xn); h_fj3[XYLENGTH-2-j] = (1.0 -xn)/ (1.0 +xn); } // simulation set up t0 = 40.0; spread = 15.0; T = 0; nsteps = 1; //copy the matrices and arrays from host memory to device memory cudaMemcpy(d_dz,h_dz, mem_size_matrix ,cudaMemcpyHostToDevice); cudaMemcpy(d_ez,h_ez, mem_size_matrix ,cudaMemcpyHostToDevice); cudaMemcpy(d_hx,h_hx, mem_size_matrix ,cudaMemcpyHostToDevice); cudaMemcpy(d_hy,h_hy, mem_size_matrix ,cudaMemcpyHostToDevice); cudaMemcpy(d_ga,h_ga, mem_size_matrix ,cudaMemcpyHostToDevice); cudaMemcpy(d_ihx,h_ihx, mem_size_matrix ,cudaMemcpyHostToDevice); cudaMemcpy(d_ihy,h_ihy, mem_size_matrix ,cudaMemcpyHostToDevice); cudaMemcpy(d_gi2,h_gi2, mem_size_array,cudaMemcpyHostToDevice); cudaMemcpy(d_gj2,h_gj2, mem_size_array,cudaMemcpyHostToDevice); cudaMemcpy(d_gi3,h_gi3, mem_size_array,cudaMemcpyHostToDevice); cudaMemcpy(d_gj2,h_gj2, mem_size_array,cudaMemcpyHostToDevice); cudaMemcpy(d_fi1,h_fi1, mem_size_array,cudaMemcpyHostToDevice); cudaMemcpy(d_fj1,h_fj1, mem_size_array,cudaMemcpyHostToDevice); cudaMemcpy(d_fi2,h_fi2, mem_size_array,cudaMemcpyHostToDevice); cudaMemcpy(d_fj2,h_fj2, mem_size_array,cudaMemcpyHostToDevice); cudaMemcpy(d_fi3,h_fi3, mem_size_array,cudaMemcpyHostToDevice); cudaMemcpy(d_fj3,h_fj3, mem_size_array,cudaMemcpyHostToDevice); //kernal dimensions setup dim3 dimBlock (BLOCK_LENGTH, BLOCK_LENGTH); dim3 dimGrid (XYLENGTH / BLOCK_LENGTH , XYLENGTH / BLOCK_LENGTH); //start simulation while(nsteps >0){ printf("nsteps -->"); scanf("%d", &nsteps); printf("%d\n", nsteps); for ( n =1; n <= nsteps; n++){ T = T+1; UpdateDZfield <<< dimGrid , dimBlock >>> ( T ) ; cudaThreadSynchronize(); UpdateEZfield <<< dimGrid , dimBlock >>> ( ); cudaThreadSynchronize(); UpdateHfield <<< dimGrid , dimBlock >>> ( ); cudaThreadSynchronize(); } // end for loop } // end while loop //copy d_ez back to h_ez and record into file cudaMemcpy(h_ez, d_ez, mem_size_matrix ,cudaMemcpyDeviceToHost); //cudaMemcpy(h_hx, d_hx, mem_size_matrix ,cudaMemcpyDeviceToHost); // write the E field out to a file "Ez" fopen_s(&fp,"Ez.txt", "w" ); for(j=0;j< XYLENGTH;j++){ for(i=0;i< XYLENGTH;i++) { fprintf(fp, "%6.3f ", AR_INDEX(h_ez,i,j)); } // end inner for loop fprintf( fp," \n"); } // end outer for loop /* // write the d_hx field out to a file "Ez" fopen_s(&fp,"Hx", "w" ); for(j=0;j< XYLENGTH;j++){ for(i=0;i< XYLENGTH;i++) { fprintf(fp, "%6.3f ", AR_INDEX(h_hx,i,j)); } // end inner for loop fprintf( fp," \n"); } // end outer for loop */ // close the file fclose(fp); // clear host and device memories free(h_ez); free(h_dz); free(h_hx); free(h_hy); free(h_ihx); free(h_ihy); free(h_ga); free(h_gi2); free(h_gj2); free(h_gi3); free(h_gj3); free(h_fi1); free(h_fj1); free(h_fi2); free(h_fj2); free(h_fi3); free(h_fj3); cudaFree(d_ez); cudaFree(d_dz); cudaFree(d_hx); cudaFree(d_hy); cudaFree(d_ihx); cudaFree(d_ihy); cudaFree(d_ga); cudaFree(d_gi2); cudaFree(d_gj3); cudaFree(d_fi1); cudaFree(d_fj1); cudaFree(d_fi2); cudaFree(d_fj2); cudaFree(d_fi3); cudaFree(d_fj3); system("pause"); }