#ifdef _WIN32
define NOMINMAX
#endif
#define NA 2 /* æ•°æ®ç»´æ•° /
#define K 500 / èšç±»æ•° /
#define Psize 300000 / ç§ç¾¤å¤§å° /
#define T 1000000 / 最大è¿ä»£æ•° /
#define ED 0.00000001 / 结æŸæ¡ä»¶ */
// declaration, forward
// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <float.h>
// includes, project
#include <cutil.h>
// includes, kernels
#include <kmeans_kernel.cu>
typedef struct {
double p[NA];
double distance[K];
}Point;
typedef struct {
Point clu_cent[K]; /* å³cluster_center 簇类ä¸å¿ƒ /
int cluster[K][Psize]; / 簇类数组 /
int cluster_num[K]; / 簇类ä¸ä¸€ç»„æ•°æ®çš„ç¼–å· /
double fitness; / æ ·æœ¬é€‚åº”åº¦å€¼,用于判æ–结æŸæ¡ä»¶ /
double old_fitness; / å‰ä¸€æ¬¡è¿ä»£çš„适应度值 /
double Je; dim3 dimgrid((Psize - (Psize % 512)) / 512 + 1, K, 1);
dim3 dimblock( 512, 1, 1); / æ‰€æœ‰æ ·æœ¬çš„å¹³æ–¹è¯¯å·®å’Œ /
}Pop;
// functions
int Is_equal(int a[], int n, int b);
double Euclid(int x, int y);
void input_data();
void Init_center();
void calculate_distance();
void Make_new_cluster();
void Make_new_center();
void output_info(int flag);
void runTest( int argc, char* argv);
// array of structures
Point all_data[Psize];
Pop pop;
//input function
void input_data()
{
FILE *infile;
int i, j;
double data;
if((infile = fopen("test1.data", "r")) == NULL){
printf("file not found\n");
exit(1);
}
for(i = 0; i < Psize; i++)
for(j = 0; j < NA; j++){
fscanf(infile, "%lf", &data);
all_data[i].p[j] = data;
}
fclose(infile);
}
// intitialsatio of clusters
void Init_center()
{
int i, j;
int num = 0;
int rand_num;
int rand_num_tmp[K];
/* éšæœºäº§ç”Ÿä¸‰ä¸ª0~Psizeçš„æ•° /
while(num < K){
rand_num = rand() % Psize;
if(!Is_equal(rand_num_tmp, num, rand_num))
rand_num_tmp[num++] = rand_num;
}
for(i = 0; i < K; i++){
// printf(“åˆå§‹åŒ–质心%d:\n”, i + 1);
for(j = 0; j < NA; j++){
pop.clu_cent[i].p[j] = all_data[rand_num_tmp[i]].p[j];
// printf(“%lf “,pop.clu_cent[i].p[j]);
}
printf(”\n”);
}
}
// some function
int Is_equal(int a[], int n, int b)
{
int i;
for(i = 0; i < n; i++)
if(a[i] == b) return 1;
return 0;
}
/*******************************************
-
calculated Psize set of data to a center of mass of K * Euclidean distance
*******************************************/
void calculate_distance()
{
int i, j;
for(i = 0; i < Psize; i++)
for(j = 0; j < K; j++){
all_data[i].distance[j] = Euclid(i, j);
// printf(“%d—%d— %lf \n”, i, j, all_data[i].distance[j]);
}
}
///////eucludian distance calculation
double Euclid(int x, int y)
{
int i;
for(i = 0; i < NA; i++)
{
distance += pow((all_data.p[i] - pop.clu_cent[y].p[i]), 2);
}
distance = sqrt(distance);
return distance;
}
void Make_new_cluster()
{
int i, j;
double min;
for(i = 0; i < K; i++) /* åˆå§‹åŒ–ç¼–å· /
pop.cluster_num[i] = 0;
for(i = 0; i < Psize; i++){
int index = 0;
min = all_data[i].distance[0];
for(j = 1; j < K; j++){ / ç›é€‰åˆ°ç°‡å¿ƒæ¬§å‡ 里德最å°çš„ /
if(all_data[i].distance[j] < min){
min = all_data[i].distance[j];
index = j;
}
}
/ 划分簇集 /
pop.cluster[index][pop.cluster_num[index]++] = i;
}
/ è®¡ç®—æ‰€æœ‰æ ·æœ¬çš„å¹³æ–¹è¯¯å·®å’Œ /
pop.Je = 0;
for(i = 0; i < K; i++)
for(j = 0; j < pop.cluster_num[i]; j++){
/ æ ·æœ¬åˆ°ç°‡å¿ƒçš„å€¼å³ä¸ºå…¶æ¬§å‡ 里德è·ç¦» /
pop.Je +=pow(all_data[pop.cluster[i][j]].distance[i],2);
}
pop.old_fitness = pop.fitness; / å‰ä¸€æ¬¡è¿ä»£é€‚应度值 /
// printf(“old_fitness = %lf\n”, pop.old_fitness);
pop.fitness = pop.Je; / æ‰€æœ‰æ ·æœ¬å¹³æ–¹è¯¯å·®å’Œå³ä¸ºé€‚应度值 */
}
void Make_new_center()
{
int i, j, n;
double tmp_sum;
for(i = 0; i < K; i++)
for(j = 0; j < NA; j++){
tmp_sum = 0;
for(n = 0; n < pop.cluster_num[i]; n++){
/* 第i个簇的第j维数的所有数æ®å’Œ /
tmp_sum += all_data[pop.cluster[i][n]].p[j];
}
/ å–å¹³å‡æ•°å¾—到新的簇ä¸å¿ƒ /nput_data(); / å¯¼å…¥æ•°æ® /
float ctimer1= clock();
Init_center(); / åˆå§‹åŒ–质心 */
float ctimer2= clock();
float ctimer7;
pop.clu_cent[i].p[j] = tmp_sum / pop.cluster_num[i];
}
}
void output_info(int flag)
{
int i, j, n;
for(i = 0; i < K; i++){
if(flag == 0){
printf("åˆå§‹åŒ–质心%d:\n", i + 1);
for(n = 0; n < NA; n++)
printf("%lf ",pop.clu_cent[i].p[n]);
} else if(flag == 1){
printf("最终质心%d:\n", i + 1);
for(n = 0; n < NA; n++)
printf("%lf ",pop.clu_cent[i].p[n]);
}
printf("\n簇类%d:\n", i + 1);
for(j = 0; j < pop.cluster_num[i]; j++){
for(n = 0; n < NA; n++){
printf("%lf ",
all_data[pop.cluster[i][j]].p[n]);
}
printf("\n");
}
}
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
/********************************
******************************/
main()
{
float start = clock();
float ctimer3;
float ctimer4;
float ctimer5;
int i;
double differ = 1; / 适应度差值 /
int flag = 0; / 用æ¥æ ‡ç¤ºæ˜¯æ˜¾ç¤ºåˆå§‹æ•°æ®è¿˜æ˜¯èšç±»åŽçš„æ•°æ® /
input_data(); / å¯¼å…¥æ•°æ® /
float ctimer1= clock();
Init_center(); / åˆå§‹åŒ–质心 /
float ctimer2= clock();
float ctimer7;
int chunk = CHUNKSIZE;
//int tid = omp_get_thread_num();
//int num_threads = 16;
/ for(int a=0;a<Psize;a++)
for(int b=0; b<NA ;b++)
double Pcopy[a][bForum]=all_data[a].p[b];
for(int c=0;c<Psize;c++)
for(int d=0; d<NA ;d++)
double dcopy[c][d]=all_data[a].distance[b]; /
//#pragma omp for schedule (static, chunk)
for(i = 0; (i < T) && (differ > ED); i++)
{
float ctimer7= clock();
calculate_distance(); / è®¡ç®—æ¬§å‡ é‡Œå¾·è·ç¦» */
ctimer3= clock();
Make_new_cluster(); /* 产生新的èšç±» */
ctimer4 = clock();
if(flag == 0)
{
output_info(flag); /* 输出第一次èšç±»åŽçš„结果 */
flag = 1;
}
Make_new_center(); /* 对新的èšç±»äº§ç”Ÿæ–°çš„质心 */
ctimer5= clock();
differ = pop.old_fitness - pop.fitness; /* 判æ–æ¡ä»¶ */
differ = fabs(differ);
}
printf("+++++++++++++++++++++++++++++++++++++++++++++++++++++++\n");
float ctimer6;
ctimer6 = clock();
output_info(flag); /* èšç±»åŽæ˜¾ç¤ºç»“æžœ */
float end= clock();
printf("%f time taken in seconds for input_data\n",(ctimer1-start)/CLOCKS_PER_SEC);
printf("%f time taken in seconds for init centre\n",(ctimer2-ctimer1)/CLOCKS_PER_SEC);
printf("%f time taken in seconds for calculate distance\n",(ctimer3-ctimer7)/CLOCKS_PER_SEC);
printf("%f time taken in seconds for make new cluster\n",(ctimer4-ctimer3)/CLOCKS_PER_SEC);
printf("%f time taken in seconds for make new center\n",(ctimer5-ctimer4)/CLOCKS_PER_SEC);
printf("%f time taken in seconds for output_info\n",(-ctimer6+end)/CLOCKS_PER_SEC);
printf("%f time taken in seconds for the for loop\n",(ctimer6-ctimer2)/CLOCKS_PER_SEC);
printf("%f time taken in seconds for total program\n",(end-start)/CLOCKS_PER_SEC);
return 0;
}
void runTest( int argc, char** argv)
{
CUT_DEVICE_INIT();
unsigned int timer = 0;
CUT_SAFE_CALL( cutCreateTimer( &timer));
CUT_SAFE_CALL( cutStartTimer( timer));
int m1 = sizeof( float)(Psize * NA);//simplified memory access
int m2 = sizeof( float)(K * NA);//simplified memory access
int m3 = sizeof( float)*(Psize * K);//simplified memory access
float* h_idata = (float*) malloc( m1);
float* h_idata2 = (float*) malloc( m2);
for( unsigned int i = 0; i < Psize; i++)
for( unsigned int j = 0; j < NA; j++)
{
h_idata[i][j]=all_data[i].p[j];//host data points
}
for( unsigned int i = 0; i < K; i++)
for( unsigned int j = 0; j < NA; j++)
{
h_idata2[i][j]=pop.clu_cent[i].p[j];//cluster centres
}
double* d_idata;
double* d_idata2;
CUDA_SAFE_CALL( cudaMalloc( (void**) &d_idata, m1));//allocating memory
CUDA_SAFE_CALL( cudaMemcpy( d_idata, h_idata, m1,cudaMemcpyHostToDevice) );//points copying
CUDA_SAFE_CALL( cudaMalloc( (void**) &d_data2, m2));//allocating memory
CUDA_SAFE_CALL( cudaMemcpy( d_idata2, h_idata2, m2,cudaMemcpyHostToDevice) );//clusters copying
float *d_odata;//output data declaration
CUDA_SAFE_CALL( cudaMalloc( (void**) &d_odata, m3));
/*
* how to declare number of threads and blocks lets dicuss
*/
kmeans_kernel<<< num_threads,num_blocks>>>( d_idata, d_idata2, d_odata,Psize,K,NA);//kernel declaration
CUT_CHECK_ERROR("Kernel execution failed");
float* h_odata = (float*) malloc( m3);
CUDA_SAFE_CALL( cudaMemcpy( h_odata, d_odata, m3, cudaMemcpyDeviceToHost) );//copying back memory
CUT_SAFE_CALL( cutStopTimer( timer));
printf( "Processing time: %f (ms)\n", cutGetTimerValue( timer));
CUT_SAFE_CALL( cutDeleteTimer( timer));
free( h_idata);
free( h_odata);
CUDA_SAFE_CALL(cudaFree(d_idata));
CUDA_SAFE_CALL(cudaFree(d_odata));
}