Skip to content

Instantly share code, notes, and snippets.

@hiromu
Created August 29, 2014 02:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hiromu/d5ed079e98caea3b008c to your computer and use it in GitHub Desktop.
Save hiromu/d5ed079e98caea3b008c to your computer and use it in GitHub Desktop.
機械学習コンテストサンプルソース
#include <float.h>
#include <limits.h>
#include <math.h>
#include <stdio.h>
#define TRAIN_SET 500
#define TEST_SET 500
#define FEATURE 6
#define K 3
#define SA 10000
#define FOLD 2
#define RANGE 10
int train[TRAIN_SET][FEATURE], label[TRAIN_SET], test[TEST_SET][FEATURE], result[TEST_SET];
void knn(int train_set[][FEATURE], int train_label[], int train_size, int test_set[][FEATURE], int test_label[], int test_size, int weight[])
{
int i, j, k, sum, count, neighbors[K];
double distance, dists[K];
/*
for (i = 0; i < FEATURE; i++)
printf("%d ", weight[i]);
printf("¥n");
*/
for (i = 0; i < test_size; i++) {
for (j = 0; j < K; j++)
dists[j] = DBL_MAX;
for (j = 0; j < train_size; j++) {
distance = 0;
for (k = 0; k < FEATURE; k++)
distance += pow(train_set[j][k] - test_set[i][k], 2) * weight[k];
distance = sqrt(distance);
for (k = 0; k < K; k++) {
if (distance < dists[k]) {
dists[k] = distance;
neighbors[k] = train_label[j];
}
}
}
sum = 0, count = 0;
for (j = 0; j < K; j++) {
if (dists[j] != DBL_MAX) {
sum += neighbors[j];
count += 1;
}
}
test_label[i] = (int)round((double)sum / count);
}
}
int main(void)
{
int i, j, k, idx, range, old, diff, sum, weight[FEATURE] = {1};
freopen("train.csv", "r", stdin);
for (i = 0; i < TRAIN_SET; i++) {
for (j = 0; j < FEATURE; j++)
scanf("%d,", &train[i][j]);
scanf("%d", &label[i]);
}
knn(train, label, TRAIN_SET / FOLD, &train[TRAIN_SET / FOLD], result, TRAIN_SET - TRAIN_SET / FOLD, weight);
diff = 0;
for (i = 0; i < TRAIN_SET - TRAIN_SET / FOLD; i++)
diff += abs(label[TRAIN_SET / FOLD + i] - result[i]);
srand(time(NULL));
for (i = 0; i < SA; i++) {
idx = rand() % FEATURE;
range = rand() % RANGE;
old = weight[idx];
if (weight[idx] < range || rand() % 2 == 0)
weight[idx] += range;
else
weight[idx] -= range;
knn(train, label, TRAIN_SET / FOLD, &train[TRAIN_SET / FOLD], result, TRAIN_SET - TRAIN_SET / FOLD, weight);
sum = 0;
for (j = 0; j < TRAIN_SET - TRAIN_SET / FOLD; j++)
sum += abs(label[TRAIN_SET / FOLD + j] - result[j]);
if (sum > diff)
weight[idx] = old;
}
freopen("test.csv", "r", stdin);
for (i = 0; i < TEST_SET; i++) {
for (j = 0; j < FEATURE - 1; j++)
scanf("%d,", &test[i][j]);
scanf("%d", &test[i][j]);
}
knn(train, label, TRAIN_SET, test, result, TEST_SET, weight);
for (i = 0; i < TEST_SET; i++)
printf("%d¥n", result[i]);
return 0;
}
#include <float.h>
#include <limits.h>
#include <math.h>
#include <stdio.h>
#define TRAIN_SET 1000
#define TEST_SET 1000
#define FEATURE 19
#define K 3
int train[TRAIN_SET][FEATURE], label[TRAIN_SET];
int main(void)
{
int i, j, k, sum, distance;
int test[FEATURE], dists[K], neighbors[K];
freopen("train.csv", "r", stdin);
for (i = 0; i < TRAIN_SET; i++) {
for (j = 0; j < FEATURE; j++)
scanf("%d,", &train[i][j]);
scanf("%d", &label[i]);
}
freopen("test.csv", "r", stdin);
for (i = 0; i < TEST_SET; i++) {
for (j = 0; j < FEATURE - 1; j++)
scanf("%d,", &test[j]);
scanf("%d", &test[j]);
for (j = 0; j < K; j++)
dists[j] = INT_MAX;
for (j = 0; j < TRAIN_SET; j++) {
distance = 0;
for (k = 0; k < FEATURE; k++)
if (train[j][k] != test[k])
distance += 1;
for (k = 0; k < K; k++) {
if (distance < dists[k]) {
dists[k] = distance;
neighbors[k] = label[j];
}
}
}
sum = 0;
for (j = 0; j < K; j++)
sum += neighbors[j];
printf("%d¥n", (int)round((double)sum / K));
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment