#include"cuda_runtime.h"
#include"device_launch_parameters.h"
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<time.h>
#include<math.h>
#define rows 4800
#define cols 128
int aa[rows][cols];
double cc[rows][rows];
double cpu_out[rows][rows];
int k;
global void distance(const int * dev_a, double * dev_c, dim3 thPerblk)
{
int th_num = blockIdx.x64 + threadIdx.x;
double sum = 0;
for(int k = th_num + 1;k<rows;k++)
{
for(int c = 0;c < cols; c++)
sum += (dev_a[th_numcols + c] − dev_a[kcols + c])(dev_a[th_numcols + c] − dev_a[kcols + c]);
dev_c[th_num] = sqrt(sum);
}
// printf ("Sum: %d \t " , sum);
} global void sorting(double *dev_c, double *sort,double K, dim3 thPerblk)
{
int temp; int i;
i = blockIdx.x * 64 + threadIdx.x;
for(int r = 0; r < rows; r++)
{
for(int c = 0;c < cols;c++)
{
if(dev_c[colsr + c]<dev_c[cols i + c])
{
temp = dev_c[cols*r + c];
dev_c[cols*r + c] = dev_c[cols* i + c];
dev_c[cols* i + c] = temp;
}
}
}
}
int main()
{
printf(" enter the number of K nearest neighbors :");
scanf("%d",&k);
FILE *myFile;
myFile = fopen(" test.csv ", "r");
if (myFile == NULL)
{
printf("Error Reading File\n");
exit(0);
}
char buffer[1024];
int i = 0, j = 0;
char *record, *line;
while ((line = fgets(buffer, sizeof(buffer), myFile)) != NULL)
{
j = 0;
record = strtok(line, " ,");
while (record != NULL)
{
// printf("%d \t %d \t %d \n" , ( cols * i ) + j , i , j );
aa[i][j] = atoi(record);
record = strtok(NULL, " ,");
j++;
}
i++;
}
fclose(myFile);
cudaError_t cudaStatus;
cudaStatus = cudaDeviceReset();
if (cudaStatus != cudaSuccess)
{
fprintf(stderr, "cudaDeviceReset failed !");
return 1;
}
cudaStatus = cudaSetDevice(0);
if (cudaStatus != cudaSuccess)
{
fprintf(stderr, "cudaSetDevice failed ! Do you have a CUDA−capable GPU installed ?");
goto Error;
}
else printf("Working \n");
clock_t start; start = clock();
int *d_a = 0;
cudaStatus = cudaMalloc((void **)&d_a, rows*cols * sizeof(int));
if (cudaStatus != cudaSuccess)
{
fprintf(stderr, "cudaMalloc failed !"); goto Error;
}
else
printf(" Success ! ! ! \n");
cudaStatus = cudaMemcpy(d_a, aa, rows*cols * sizeof(int *), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess)
{
fprintf(stderr, "cudaMemcpy failed !");
goto Error;
}
else printf(" Success ! ! ! \n");
double *d_c = 0;
cudaStatus = cudaMalloc((void **)&d_c, rows* rows * sizeof(double));
if (cudaStatus != cudaSuccess)
{
fprintf(stderr, "cudaMalloc failed !");
goto Error;
}
else printf(" Success ! ! ! \n");
double *sort = 0; cudaStatus = cudaMalloc((void **)&sort, rows* rows * sizeof(double));
if (cudaStatus != cudaSuccess)
{
fprintf(stderr, "cudaMalloc failed !");
goto Error;
}
else printf(" Success ! ! ! \n");
int threads = 64;
while (rows%threads != 0) threads++;
printf("TH: %d \n", threads);
//return 0;
dim3 threadsPerBlock(threads);
dim3 numBlocks(rows / threadsPerBlock.x);
distance << <numBlocks, threadsPerBlock >> > (d_a, d_c);
sorting << <numBlocks, threadsPerBlock >> > (sort, K);
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess)
{
fprintf(stderr, "addKern launch failed : %s\n", cudaGetErrorString(cudaStatus));
goto Error;
}
cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess)
{
fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel !\n", cudaStatus);
goto Error;
}
//return cudaStatus ;
cudaStatus = cudaMemcpy(cc, d_c, rows*rows * sizeof(double), cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess)
{
fprintf(stderr, "addKernel launch failed : %s\n", cudaGetErrorString(cudaStatus));
goto Error;
}
for (int i = 0; i <= K; i++)
{
}
printf("GPU Time Taken: %f \n", (double)(clock() − start) / CLK_TCK);
for (int l = 0; l <= K; l++)
{
for (i = 0; i < rows; i++)
{
for (int j = 0; j < rows; j++)
{
printf("%f \t ", cc[(rows * i) + j]);
}
}
}
Error:
// printf (" Exiting . . \n");
cudaFree(d_c);
cudaFree(d_a);
return cudaStatus;
}