Created
May 19, 2013 05:19
-
-
Save nebuta/5606775 to your computer and use it in GitHub Desktop.
Jubatus classifier test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
$host = "localhost" | |
$port = 9199 | |
$name = "paper_genre" | |
require 'json' | |
require 'find' | |
require 'jubatus/classifier/client' | |
require 'jubatus/classifier/types' | |
def train(client) | |
# prepare training data | |
# predict the last ones (that are commented out) | |
chem_files = [] | |
Find.find('chem/'){|f| | |
chem_files.push(f) if File.file? f | |
} | |
bio_files = [] | |
Find.find('bio/'){|f| | |
bio_files.push(f) if File.file? f | |
} | |
# p chem_files | |
# p bio_files | |
chem_data = [] | |
chem_files.each{|f| | |
abs = IO.readlines(f).join('\n') | |
chem_data += [["chem",Jubatus::Classifier::Datum.new([["abstract",abs]],[])]] | |
} | |
bio_data = [] | |
bio_files.each{|f| | |
abs = IO.readlines(f).join('\n') | |
bio_data += [["bio",Jubatus::Classifier::Datum.new([["abstract",abs]],[])]] | |
} | |
train_data = chem_data + bio_data | |
# p train_data | |
# training data must be shuffled on online learning! | |
train_data.sort_by{rand} | |
# run train | |
r = client.train($name, train_data) | |
p r | |
end | |
def predict(client) | |
# predict the last shogun | |
data = | |
[ | |
Jubatus::Classifier::Datum.new([["abstract", "Asprin was synthesized."]], []), | |
Jubatus::Classifier::Datum.new([["abstract", "This DNA sequence is specific to E coli."]], []), | |
Jubatus::Classifier::Datum.new([["abstract", "Toxicity of carbon nanotube is still unclear."]], []), | |
Jubatus::Classifier::Datum.new([["abstract", "Proteinogenic amino acids are amino acids that are precursors to proteins."]], []), | |
] | |
res = client.classify($name, data) | |
p res | |
data.each { |d| | |
res = client.classify($name, [d]) | |
# get the predicted genre | |
# p res | |
puts res[0].max{ |x, y| x[1] <=> y[1]}[0] + ": " + d.string_values[0][1] | |
} | |
end | |
# connect to the jubatus | |
client = Jubatus::Classifier::Client::Classifier.new($host, $port) | |
# run example | |
train(client) | |
predict(client) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment