#include <stdlib.h>
#include <stdio.h>
#include<math.h>
#include<cuda.h>
#include<time.h>
#define MAX_CITIES 280
#define MAX_ANT 2
#define BETA 5.0
#define RHO 0.5
#define gpuErrchk(ans) { gpuAssert((ans), FILE, LINE); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,“GPUassert: %s %s %d\n”, cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
struct antType{
int curCity;
int visited[MAX_CITIES];
int path[MAX_CITIES];
float tourLength;
};
//to calculate square
device float sqr(float x)
{
return x*x;
}
device float distance(int i,int j,int *a_d, int *b_d)
{
return (sqrt(sqr(a_d[i]-a_d[j])+sqr(b_d[i]-b_d[j])));
}
//calculate distance between cities and pheromone
global void calc_dist(float *dev_dist,int *a_d, int *b_d,float *dev_phero)
{
int index_x = blockIdx.x * blockDim.x + threadIdx.x;
int index_y = blockIdx.y * blockDim.y + threadIdx.y;
int grid_width = gridDim.x * blockDim.x;
int index = index_y * grid_width + index_x;
dev_dist[index]=distance(index_x,index_y,a_d,b_d);
dev_phero[index]=(1.0/MAX_CITIES);
}
device int isfree(int ant,struct antType *dev_ant)
{
int i;
for(i=0;i<MAX_CITIES;i++)
{
if(dev_ant[ant].visited[i]==0)
return 1;
}
return 0;
}
device int selectNextCity(int start,int ant,float *dev_dist,float *dev_phero,struct antType dev_ant)
{
dev_ant[ant].visited[start]=1;
int c=start;
float max=0.0;
float num_value,v,v2;
int i,next,l;
int index1,index2;
float power,power2;
for(i=0;i<MAX_CITIES;i++)
{
if(i!=c&&dev_ant[ant].visited[i]==0)
{
index1=(cMAX_CITIES+i);
v=1/dev_dist[index1];
power=(float)pow((double)v,BETA);
num_value=dev_phero[index1]power;
float den_value=0.0;
for(l=0;l<MAX_CITIES;l++)
{
index2=cMAX_CITIES+l;
if(l!=i&&l!=c)
{
v2=1/dev_dist[index2];
power2=(float)pow((double)v2,BETA);
den_value+=(dev_phero[index2]*power2);
}
}
if((num_value/den_value)>max)
{
max=num_value/den_value;
next=i;
}
}
}
return next;
}
global void simulatedev_ant(struct antType *dev_ant,float *dev_phero,float *dev_dist)
{
int tid = blockIdx.x * blockDim.x + threadIdx.x;
int next;
float value=(1.0 - RHO);
int k=0;
dev_ant[tid].path[0]=dev_ant[tid].curCity;
int start=dev_ant[tid].curCity;
int remaining=MAX_CITIES-1;
while((remaining–)>0)
{
next=selectNextCity(start,tid,dev_dist,dev_phero,dev_ant);
dev_ant[tid].visited[next]=1;
int index = startMAX_CITIES+next;
dev_ant[tid].path[++k]=next;
dev_ant[tid].tourLength +=dev_dist[index];
dev_phero[index]=valuedev_phero[index];
start=next;
}
}
int main(void)
{
cudaFree(0);
int num_elements_x = 32;
int num_elements_y = 32;
int sno[MAX_CITIES];
int num_bytes = num_elements_x * num_elements_y * sizeof(float);
float *prob;
float *dist,*dev_dist;
float *phero,*dev_phero;
clock_t begin,end;
float timeSpent;
float optimal[MAX_ANT];
begin=clock();
dist = (float*)malloc(num_bytes);
gpuErrchk(cudaMalloc((void**)&dev_dist, num_bytes));
phero = (float*)malloc(num_bytes);
gpuErrchk(cudaMalloc((void**)&dev_phero, num_bytes));
prob = (float*)malloc(num_bytes);
int *a_h,*b_h,*a_d,*b_d;
FILE *f1;
int n=0;
a_h = (int *)malloc(sizeof(int)*MAX_CITIES);
b_h = (int *)malloc(sizeof(int)*MAX_CITIES);
f1=fopen(“co_32.txt”,“r”);// input file
while(!feof(f1))
{
fscanf(f1,“%d”,&a_h[n]);
fscanf(f1,“%d”,&b_h[n]);
n++;
}
fclose(f1);
struct antType *dev_ant,host_ant;
int dev_ant_size=MAX_ANTsizeof(struct antType);
printf(“\nsize%d\n”,dev_ant_size);
host_ant=(struct antType*)malloc(MAX_ANT*sizeof(struct antType));
gpuErrchk(cudaMalloc((void **) &dev_ant, dev_ant_size));
int i,j;
for(i=0;i<MAX_ANT;i++)
{
host_ant[i].curCity=rand()%MAX_CITIES;
host_ant[i].tourLength=0.0;
for(j=0;j<MAX_CITIES;j++)
{
host_ant[i].visited[j]=0;
host_ant[i].path[j]=-1;
}
}
gpuErrchk(cudaMalloc((void **) &a_d, sizeof(int)*MAX_CITIES));
gpuErrchk(cudaMalloc((void **) &b_d, sizeof(int)*MAX_CITIES));
gpuErrchk(cudaMemcpy(a_d, a_h, sizeof(int)*MAX_CITIES, cudaMemcpyHostToDevice));
gpuErrchk(cudaMemcpy(b_d, b_h, sizeof(int)*MAX_CITIES, cudaMemcpyHostToDevice));
dim3 block_size;
block_size.x = 8;
block_size.y = 4;
dim3 grid_size;
grid_size.x = num_elements_x / block_size.x;
grid_size.y = num_elements_y / block_size.y;
calc_dist<<<grid_size,block_size>>>(dev_dist,a_d,b_d,dev_phero);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
gpuErrchk(cudaMemcpy(dev_ant,host_ant,dev_ant_size,cudaMemcpyHostToDevice));
simulatedev_ant<<<1,MAX_ANT>>>(dev_ant,dev_phero,dev_dist);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
gpuErrchk(cudaMemcpy(host_ant,dev_ant,dev_ant_size,cudaMemcpyDeviceToHost));
printf(“\n\n”);
for(i=0;i<MAX_ANT;i++)
{
printf(“ANT %d details\n\n”,i+1);
optimal[i]=host_ant[i].tourLength;
printf(“%f”,host_ant[i].tourLength);
printf(“\nPath\n”);
for(j=0;j<MAX_CITIES;j++)
printf(“%d\t”,host_ant[i].path[j]);
printf(“\n”);
}
float smallest = optimal[0];
for (i = 0; i < MAX_ANT; i++)
{
if (optimal[i] < smallest) {
smallest = optimal[i];
}
}
printf(“\nSmallest Element : %f”, smallest);
end=clock();
timeSpent=(double)((end-begin)/(CLOCKS_PER_SEC/1000));
printf(“Time spent=%lf\n”,timeSpent);
// deallocate memory
free(dist);
free(host_ant);
cudaFree(dev_ant);
cudaFree(dev_dist);
free(phero);
cudaFree(dev_phero);
}