Skip to content

Instantly share code, notes, and snippets.

@nomaddo
Last active October 17, 2019 15:36
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nomaddo/5991062 to your computer and use it in GitHub Desktop.
Save nomaddo/5991062 to your computer and use it in GitHub Desktop.
k近傍法のプログラム gcc k-nearest.c -lm でコンパイル
/*
k近傍法のプログラム
gcc k-nearest.c -lm でコンパイル
下に書いてあるアドレスからwine.dataをダウンロードして同じファイル内で使って下さい
a.out の引数にはKの値と、正規化するかどうか(0 or 1)の2つの引数を入力します。
k-nearest neighbor algorithm for wine.data in ( http://archive.ics.uci.edu/ml/machine-learning-databases/wine/ )
how to compile :
gcc k-nearest.c -lm
how to use:
1. Please download wine.data from upper URL and put it in the same folder of this source file.
2. Compile k-nearest.c
3. input "./a.out [k:ingeter] [0 or 1, which means a.out normalizes the data file or not]
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define SIZE 178
int exc[] = {0};
int lenExc = 1;
struct distance{
double dis;
int ord;
};
int compareDis (const void *a,const void *b)
{
return (int)(*(struct distance*)a).dis - (*(struct distance*)b).dis;
}
void normalize(double arr[][14])
{
int i,j;
double average[14];
double variance[14];
for(i=0;i<15;i++){
average[i] = 0.0;
variance[i] = 0.0;
}
/* 平均 */
for(i=0;i<SIZE;i++){
for(j=1;j<=13;j++){
average[j] += arr[i][j];
}
}
for(j=1;j<=13;j++) {
average[j] /= SIZE;
/* printf("%f\n",average[j]); */
}
/* 分散 */
for(i=1;i<=13;i++){
for(j=0;j<SIZE;j++){
variance[i] += pow(arr[j][i] - average[i],2.0);
}
variance[i] /= SIZE - 1;
/* printf("%lf\n",variance[i]); */
}
/* 正規化 */
for(i=1;i<=13;i++){
for(j=0;j<SIZE;j++){
arr[j][i] /= variance[i];
}
}
}
/* 0:false 1:true */
/* 特定の要素を距離の経産から除外したい時に使う */
int isIncludeExc(int x)
{
int i;
for(i=0;i<=lenExc;i++){
if (exc[i] == x) return 1;
}
return 0;
}
void print(double arr[])
{
int j;
printf("class:%d ",(int)(arr[0]));
for(j=1;j<=13;j++){
printf("%f ",arr[j]);
}
printf("\n");
}
void printArr(double arr[][14])
{
int i,j;
for(i=0;i<SIZE;i++){
printf("class:%d ",(int)(arr[i][0]));
for(j=1;j<=13;j++){
printf("%f ",arr[i][j]);
}
printf("\n");
}
}
double distance(double arr[][14], int x, int y)
{
int i;
double sum=0.0;
for(i=1;i<14;i++){
if (isIncludeExc(i)){/* printf("work"); */}
else {
/* print(arr[x]); print(arr[y]); */
sum += pow(arr[x][i] - arr[y][i],2.0);
}
}
/* printf("distance:%f\n",sum); */
return (sum);
}
int minIndex(double dis[],int len){
int i,index=-1;
double min = 10000000.0;
for(i=0;i<len;i++){
/* printf("x\n"); */
if (min > dis[i]){
min = dis[i];
index = i;
/* printf("now_min:%f %d\n",min,index); */
}
}
return(index);
}
int main(int argc, char **argv){
int i;
if(argv[1] == NULL || argv[2] == NULL) {
printf("usage:./a.out (k as ingeter) (isNormalize as 0 or 1)\n");
exit(1);
}
kmeans(atoi(argv[1]),atoi(argv[2]));
/* for(i=1;i<20;i++) kmeans(i); */
}
int kmeans(int k,int isNormalize)
{
int i,j;
int temp1,temp2;
int x1=0,x2=0,x3=0;
FILE *fp;
int success=0,fail=0;
int eachFail[6];
struct distance disArr[SIZE/2+1];
double arr[SIZE][14];
if ((fp = fopen("wine.data","r")) == NULL){
printf("fail open\n");
exit(EXIT_FAILURE);
}
/* init of the counters of fails */
for(i=0;i<=6;i++){
eachFail[i] = 0;
}
for(i=0;i<SIZE;i++){
fscanf(fp,"%lf,%lf,%lf,%lf,%lf,%lf,%lf,%lf,%lf,%lf,%lf,%lf,%lf,%lf,%lf",
&(arr[i][0]),
&(arr[i][1]),
&(arr[i][2]),
&(arr[i][3]),
&(arr[i][4]),
&(arr[i][5]),
&(arr[i][6]),
&(arr[i][7]),
&(arr[i][8]),
&(arr[i][9]),
&(arr[i][10]),
&(arr[i][11]),
&(arr[i][12]),
&(arr[i][13]));
}
/* printArr(arr); */
if(isNormalize) normalize(arr);
/* printArr(arr); */
/* Indexが 2n:標本データ、2n+1:テスト用データ */
for(i=0;i<SIZE/2;i++){
x1=0; x2=0; x3=0;
for(j=0;j<SIZE/2;j++){ disArr[j].dis = 10000000;}
for(j=0;j<SIZE/2;j++){
disArr[j].dis = distance(arr,2*i+1,2*j);
disArr[j].ord = 2 * j;
}
qsort(disArr,SIZE/2,sizeof(struct distance),compareDis);
for(j=0; j<k; j++){
switch((int)arr[disArr[j].ord][0]){
case 1: x1++;
break;
case 2: x2++;
break;
case 3: x3++;
break;
};
}
if(x1 >= x2 && x1 >= x3) temp1 = 1;
if(x2 > x1 && x2 >= x3) temp1 = 2;
if(x3 > x1 && x3 > x2) temp1 = 3;
temp2 = (int)arr[2*i+1][0];
if (temp1 == temp2) success++;
else {
fail++;
if(temp1==1 && temp2 == 2) eachFail[0]++;
if(temp1==1 && temp2 == 3) eachFail[1]++;
if(temp1==2 && temp2 == 1) eachFail[2]++;
if(temp1==2 && temp2 == 3) eachFail[3]++;
if(temp1==3 && temp2 == 1) eachFail[4]++;
if(temp1==3 && temp2 == 2) eachFail[5]++;
}
}
printf("success:%d,fail:%d,rate:%f\n",success,fail,(double)success/((double)(success+fail)));
printf("real class=1, output = 2: %d\n", eachFail[0]);
printf("real class=1, output = 3: %d\n", eachFail[1]);
printf("real class=2, output = 1: %d\n", eachFail[2]);
printf("real class=2, output = 3: %d\n", eachFail[3]);
printf("real class=3, output = 1: %d\n", eachFail[4]);
printf("real class=3, output = 2: %d\n", eachFail[5]);
/* printf("%f\n",distance(arr,0,1)); */
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment