Last active
October 17, 2019 15:36
-
-
Save nomaddo/5991062 to your computer and use it in GitHub Desktop.
k近傍法のプログラム
gcc k-nearest.c -lm でコンパイル
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
k近傍法のプログラム | |
gcc k-nearest.c -lm でコンパイル | |
下に書いてあるアドレスからwine.dataをダウンロードして同じファイル内で使って下さい | |
a.out の引数にはKの値と、正規化するかどうか(0 or 1)の2つの引数を入力します。 | |
k-nearest neighbor algorithm for wine.data in ( http://archive.ics.uci.edu/ml/machine-learning-databases/wine/ ) | |
how to compile : | |
gcc k-nearest.c -lm | |
how to use: | |
1. Please download wine.data from upper URL and put it in the same folder of this source file. | |
2. Compile k-nearest.c | |
3. input "./a.out [k:ingeter] [0 or 1, which means a.out normalizes the data file or not] | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <math.h> | |
#define SIZE 178 | |
int exc[] = {0}; | |
int lenExc = 1; | |
struct distance{ | |
double dis; | |
int ord; | |
}; | |
int compareDis (const void *a,const void *b) | |
{ | |
return (int)(*(struct distance*)a).dis - (*(struct distance*)b).dis; | |
} | |
void normalize(double arr[][14]) | |
{ | |
int i,j; | |
double average[14]; | |
double variance[14]; | |
for(i=0;i<15;i++){ | |
average[i] = 0.0; | |
variance[i] = 0.0; | |
} | |
/* 平均 */ | |
for(i=0;i<SIZE;i++){ | |
for(j=1;j<=13;j++){ | |
average[j] += arr[i][j]; | |
} | |
} | |
for(j=1;j<=13;j++) { | |
average[j] /= SIZE; | |
/* printf("%f\n",average[j]); */ | |
} | |
/* 分散 */ | |
for(i=1;i<=13;i++){ | |
for(j=0;j<SIZE;j++){ | |
variance[i] += pow(arr[j][i] - average[i],2.0); | |
} | |
variance[i] /= SIZE - 1; | |
/* printf("%lf\n",variance[i]); */ | |
} | |
/* 正規化 */ | |
for(i=1;i<=13;i++){ | |
for(j=0;j<SIZE;j++){ | |
arr[j][i] /= variance[i]; | |
} | |
} | |
} | |
/* 0:false 1:true */ | |
/* 特定の要素を距離の経産から除外したい時に使う */ | |
int isIncludeExc(int x) | |
{ | |
int i; | |
for(i=0;i<=lenExc;i++){ | |
if (exc[i] == x) return 1; | |
} | |
return 0; | |
} | |
void print(double arr[]) | |
{ | |
int j; | |
printf("class:%d ",(int)(arr[0])); | |
for(j=1;j<=13;j++){ | |
printf("%f ",arr[j]); | |
} | |
printf("\n"); | |
} | |
void printArr(double arr[][14]) | |
{ | |
int i,j; | |
for(i=0;i<SIZE;i++){ | |
printf("class:%d ",(int)(arr[i][0])); | |
for(j=1;j<=13;j++){ | |
printf("%f ",arr[i][j]); | |
} | |
printf("\n"); | |
} | |
} | |
double distance(double arr[][14], int x, int y) | |
{ | |
int i; | |
double sum=0.0; | |
for(i=1;i<14;i++){ | |
if (isIncludeExc(i)){/* printf("work"); */} | |
else { | |
/* print(arr[x]); print(arr[y]); */ | |
sum += pow(arr[x][i] - arr[y][i],2.0); | |
} | |
} | |
/* printf("distance:%f\n",sum); */ | |
return (sum); | |
} | |
int minIndex(double dis[],int len){ | |
int i,index=-1; | |
double min = 10000000.0; | |
for(i=0;i<len;i++){ | |
/* printf("x\n"); */ | |
if (min > dis[i]){ | |
min = dis[i]; | |
index = i; | |
/* printf("now_min:%f %d\n",min,index); */ | |
} | |
} | |
return(index); | |
} | |
int main(int argc, char **argv){ | |
int i; | |
if(argv[1] == NULL || argv[2] == NULL) { | |
printf("usage:./a.out (k as ingeter) (isNormalize as 0 or 1)\n"); | |
exit(1); | |
} | |
kmeans(atoi(argv[1]),atoi(argv[2])); | |
/* for(i=1;i<20;i++) kmeans(i); */ | |
} | |
int kmeans(int k,int isNormalize) | |
{ | |
int i,j; | |
int temp1,temp2; | |
int x1=0,x2=0,x3=0; | |
FILE *fp; | |
int success=0,fail=0; | |
int eachFail[6]; | |
struct distance disArr[SIZE/2+1]; | |
double arr[SIZE][14]; | |
if ((fp = fopen("wine.data","r")) == NULL){ | |
printf("fail open\n"); | |
exit(EXIT_FAILURE); | |
} | |
/* init of the counters of fails */ | |
for(i=0;i<=6;i++){ | |
eachFail[i] = 0; | |
} | |
for(i=0;i<SIZE;i++){ | |
fscanf(fp,"%lf,%lf,%lf,%lf,%lf,%lf,%lf,%lf,%lf,%lf,%lf,%lf,%lf,%lf,%lf", | |
&(arr[i][0]), | |
&(arr[i][1]), | |
&(arr[i][2]), | |
&(arr[i][3]), | |
&(arr[i][4]), | |
&(arr[i][5]), | |
&(arr[i][6]), | |
&(arr[i][7]), | |
&(arr[i][8]), | |
&(arr[i][9]), | |
&(arr[i][10]), | |
&(arr[i][11]), | |
&(arr[i][12]), | |
&(arr[i][13])); | |
} | |
/* printArr(arr); */ | |
if(isNormalize) normalize(arr); | |
/* printArr(arr); */ | |
/* Indexが 2n:標本データ、2n+1:テスト用データ */ | |
for(i=0;i<SIZE/2;i++){ | |
x1=0; x2=0; x3=0; | |
for(j=0;j<SIZE/2;j++){ disArr[j].dis = 10000000;} | |
for(j=0;j<SIZE/2;j++){ | |
disArr[j].dis = distance(arr,2*i+1,2*j); | |
disArr[j].ord = 2 * j; | |
} | |
qsort(disArr,SIZE/2,sizeof(struct distance),compareDis); | |
for(j=0; j<k; j++){ | |
switch((int)arr[disArr[j].ord][0]){ | |
case 1: x1++; | |
break; | |
case 2: x2++; | |
break; | |
case 3: x3++; | |
break; | |
}; | |
} | |
if(x1 >= x2 && x1 >= x3) temp1 = 1; | |
if(x2 > x1 && x2 >= x3) temp1 = 2; | |
if(x3 > x1 && x3 > x2) temp1 = 3; | |
temp2 = (int)arr[2*i+1][0]; | |
if (temp1 == temp2) success++; | |
else { | |
fail++; | |
if(temp1==1 && temp2 == 2) eachFail[0]++; | |
if(temp1==1 && temp2 == 3) eachFail[1]++; | |
if(temp1==2 && temp2 == 1) eachFail[2]++; | |
if(temp1==2 && temp2 == 3) eachFail[3]++; | |
if(temp1==3 && temp2 == 1) eachFail[4]++; | |
if(temp1==3 && temp2 == 2) eachFail[5]++; | |
} | |
} | |
printf("success:%d,fail:%d,rate:%f\n",success,fail,(double)success/((double)(success+fail))); | |
printf("real class=1, output = 2: %d\n", eachFail[0]); | |
printf("real class=1, output = 3: %d\n", eachFail[1]); | |
printf("real class=2, output = 1: %d\n", eachFail[2]); | |
printf("real class=2, output = 3: %d\n", eachFail[3]); | |
printf("real class=3, output = 1: %d\n", eachFail[4]); | |
printf("real class=3, output = 2: %d\n", eachFail[5]); | |
/* printf("%f\n",distance(arr,0,1)); */ | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment