Skip to content

Instantly share code, notes, and snippets.

@y-tag
Created March 23, 2013 22:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save y-tag/5229594 to your computer and use it in GitHub Desktop.
Save y-tag/5229594 to your computer and use it in GitHub Desktop.
#include <cstdio>
#include <cfloat>
#include <cstring>
#include <algorithm>
#include <iostream>
#include <string>
#include <utility>
#include <vector>
#include <fstream>
#include <jubatus/client.hpp>
// g++ -O2 -o eval_svmdata2 eval_svmdata2.cpp `pkg-config pficommon --libs --cflags` -lmsgpack -ljubatus_mpio -ljubatus_msgpack-rpc
using jubatus::classifier::datum;
using jubatus::classifier::estimate_result;
int parse_line(const std::string &line, std::string *label, datum *d) {
d->string_values.clear();
d->num_values.clear();
char cbuff[line.size() + 1];
memmove(cbuff, line.c_str(), line.size() + 1);
char *p = strtok(cbuff, " \t");
*label = p;
while (1) {
char *f = strtok(NULL, ":");
char *v = strtok(NULL, " \t");
if (v == NULL) {
break;
}
d->num_values.push_back(std::make_pair(std::string(f), strtod(v, NULL)));
}
return 1;
}
int main(int argc, char **argv) {
std::string host = "127.0.0.1";
int port = 9199;
std::string name = "test";
jubatus::classifier::client::classifier client(host, port, 10.0);
client.clear(name);
if (argc < 3) {
fprintf(stderr, "%s train_f test_f\n", argv[0]);
exit(1);
}
const char *train_f = argv[1];
const char *test_f = argv[2];
int num_try = 20;
srand(1000);
std::string buff;
std::vector<std::pair<std::string, datum> > train_data;
std::vector<std::pair<std::string, datum> > test_data;
std::ifstream trfs;
std::ifstream tefs;
trfs.open(train_f);
while (getline(trfs, buff)) {
std::string label;
datum d;
parse_line(buff, &label, &d);
train_data.push_back(std::make_pair(label, d));
}
tefs.open(test_f);
while (getline(tefs, buff)) {
std::string label;
datum d;
parse_line(buff, &label, &d);
test_data.push_back(std::make_pair(label, d));
}
for (int i = 0; i < num_try; ++i) {
fprintf(stderr, "%02d\n", i);
client.clear(name);
std::random_shuffle(train_data.begin(), train_data.end());
std::vector<std::pair<std::string, datum> > tr_data;
std::vector<datum> te_data;
int train_mistakes = 0;
for (size_t j = 0; j < train_data.size(); ++j) {
te_data.clear();
te_data.push_back(train_data[j].second);
std::vector<std::vector<estimate_result> > results = client.classify(name, te_data);
std::string predicted_label = "";
double max_score = -DBL_MAX;
for (size_t i = 0; i < results[0].size(); ++i) {
const estimate_result& r = results[0][i];
if (r.score > max_score) {
max_score = r.score;
predicted_label = r.label;
}
}
if (predicted_label != train_data[j].first) {
train_mistakes += 1;
}
tr_data.clear();
tr_data.push_back(train_data[j]);
client.train(name, tr_data);
}
int test_mistakes = 0;
for (size_t j = 0; j < test_data.size(); ++j) {
te_data.clear();
te_data.push_back(test_data[j].second);
std::vector<std::vector<estimate_result> > results = client.classify(name, te_data);
std::string predicted_label = "";
double max_score = -DBL_MAX;
for (size_t i = 0; i < results[0].size(); ++i) {
const estimate_result& r = results[0][i];
if (r.score > max_score) {
max_score = r.score;
predicted_label = r.label;
}
}
if (predicted_label != test_data[j].first) {
test_mistakes += 1;
}
}
fprintf(stdout, "%d\t%d\t%f\t%f\n", train_mistakes, test_mistakes, static_cast<double>(train_mistakes) / train_data.size(), static_cast<double>(test_mistakes) / test_data.size());
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment