Knn algorithm. Can someone help me with the errors.

#include"cuda_runtime.h"
#include"device_launch_parameters.h"
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<time.h>
#include<math.h>
#define rows 4800
#define cols 128

int aa[rows][cols];
double cc[rows][rows];
double cpu_out[rows][rows];
int k;

global void distance(const int * dev_a, double * dev_c, dim3 thPerblk)
{
int th_num = blockIdx.x64 + threadIdx.x;
double sum = 0;
for(int k = th_num + 1;k<rows;k++)
{
for(int c = 0;c < cols; c++)
sum += (dev_a[th_num
cols + c] − dev_a[kcols + c])(dev_a[th_numcols + c] − dev_a[kcols + c]);

		dev_c[th_num] = sqrt(sum);
}
// printf ("Sum: %d \t " , sum);

} global void sorting(double *dev_c, double *sort,double K, dim3 thPerblk)

{
int temp; int i;
i = blockIdx.x * 64 + threadIdx.x;
for(int r = 0; r < rows; r++)
{
for(int c = 0;c < cols;c++)
{
if(dev_c[colsr + c]<dev_c[cols i + c])

		{
			temp = dev_c[cols*r + c]; 
			dev_c[cols*r + c] = dev_c[cols* i + c]; 
			dev_c[cols* i + c] = temp; 
		}
	}
}

}
int main()
{

printf(" enter the number of K nearest neighbors :");
scanf("%d",&k);
FILE *myFile;
myFile = fopen(" test.csv ", "r");

if (myFile == NULL)

{
	printf("Error Reading File\n");
	exit(0);
}

char buffer[1024];
int i = 0, j = 0;
char *record, *line;

while ((line = fgets(buffer, sizeof(buffer), myFile)) != NULL)

{
	j = 0;
	record = strtok(line, " ,");
	while (record != NULL)
	{
		// printf("%d \t %d \t %d \n" , ( cols * i ) + j , i , j ); 
		aa[i][j] = atoi(record);
		record = strtok(NULL, " ,");
		j++;
	}
	i++;
}

fclose(myFile);
cudaError_t cudaStatus;
cudaStatus = cudaDeviceReset();

if (cudaStatus != cudaSuccess)
{
	fprintf(stderr, "cudaDeviceReset failed !");
	return 1;
}
cudaStatus = cudaSetDevice(0);
if (cudaStatus != cudaSuccess)
{
	fprintf(stderr, "cudaSetDevice failed ! Do you have a CUDA−capable GPU installed ?");
	goto Error;
}
else printf("Working \n");

clock_t start; start = clock();
int *d_a = 0;
cudaStatus = cudaMalloc((void **)&d_a, rows*cols * sizeof(int));
if (cudaStatus != cudaSuccess)
{
	fprintf(stderr, "cudaMalloc failed !"); goto Error;
}

else
	printf(" Success ! ! ! \n");
cudaStatus = cudaMemcpy(d_a, aa, rows*cols * sizeof(int *), cudaMemcpyHostToDevice);

if (cudaStatus != cudaSuccess)
{
	fprintf(stderr, "cudaMemcpy failed !");
	goto Error;
}

else printf(" Success ! ! ! \n");
double *d_c = 0;
cudaStatus = cudaMalloc((void **)&d_c, rows* rows * sizeof(double));
if (cudaStatus != cudaSuccess)
{
	fprintf(stderr, "cudaMalloc failed !");
	goto Error;
}
else printf(" Success ! ! ! \n");
double *sort = 0; cudaStatus = cudaMalloc((void **)&sort, rows* rows * sizeof(double));
if (cudaStatus != cudaSuccess)
{
	fprintf(stderr, "cudaMalloc failed !");
	goto Error;
}
else printf(" Success ! ! ! \n");
int threads = 64;
while (rows%threads != 0) threads++;
printf("TH: %d \n", threads);
//return 0;
dim3 threadsPerBlock(threads);
dim3 numBlocks(rows / threadsPerBlock.x);

distance << <numBlocks, threadsPerBlock >> > (d_a, d_c);
sorting << <numBlocks, threadsPerBlock >> > (sort, K);

cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess)
{
	fprintf(stderr, "addKern launch failed : %s\n", cudaGetErrorString(cudaStatus));
	goto Error;
}
cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess)
{
	fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel !\n", cudaStatus);
	goto Error;
}
//return cudaStatus ; 
cudaStatus = cudaMemcpy(cc, d_c, rows*rows * sizeof(double), cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess)
{
	fprintf(stderr, "addKernel launch failed : %s\n", cudaGetErrorString(cudaStatus));
	goto Error;
}
for (int i = 0; i <= K; i++)
{
}
printf("GPU Time Taken: %f \n", (double)(clock() − start) / CLK_TCK);
for (int l = 0; l <= K; l++)
{
	for (i = 0; i < rows; i++)
	{
		for (int j = 0; j < rows; j++)
		{
			printf("%f \t ", cc[(rows * i) + j]);
		}
	}
}

Error:
// printf (" Exiting . . \n");
cudaFree(d_c);
cudaFree(d_a);
return cudaStatus;
}

Use proper CUDA error checking:
[url]Google

That should help to figure out the issues.

The error checking appears to be pretty much correct to me.

since the code requires both user input as well as input files, it seems that there is not enough information to run it.

Being unable to run it, and with no description of what the errors are, it may be difficult to provide help.

Also, if you are having kernel execution issues, you may want to follow the procedure outlined here:

[url]cuda - Unspecified launch failure on Memcpy - Stack Overflow

to narrow it to a specific line of kernel code. At that point, in-kernel printf or other debugging techniques may be used if additional clarity is needed.