Skip to content

Instantly share code, notes, and snippets.

@nebuta
Created May 19, 2013 05:19
Show Gist options
  • Save nebuta/5606775 to your computer and use it in GitHub Desktop.
Save nebuta/5606775 to your computer and use it in GitHub Desktop.
Jubatus classifier test
# -*- coding: utf-8 -*-
$host = "localhost"
$port = 9199
$name = "paper_genre"
require 'json'
require 'find'
require 'jubatus/classifier/client'
require 'jubatus/classifier/types'
def train(client)
# prepare training data
# predict the last ones (that are commented out)
chem_files = []
Find.find('chem/'){|f|
chem_files.push(f) if File.file? f
}
bio_files = []
Find.find('bio/'){|f|
bio_files.push(f) if File.file? f
}
# p chem_files
# p bio_files
chem_data = []
chem_files.each{|f|
abs = IO.readlines(f).join('\n')
chem_data += [["chem",Jubatus::Classifier::Datum.new([["abstract",abs]],[])]]
}
bio_data = []
bio_files.each{|f|
abs = IO.readlines(f).join('\n')
bio_data += [["bio",Jubatus::Classifier::Datum.new([["abstract",abs]],[])]]
}
train_data = chem_data + bio_data
# p train_data
# training data must be shuffled on online learning!
train_data.sort_by{rand}
# run train
r = client.train($name, train_data)
p r
end
def predict(client)
# predict the last shogun
data =
[
Jubatus::Classifier::Datum.new([["abstract", "Asprin was synthesized."]], []),
Jubatus::Classifier::Datum.new([["abstract", "This DNA sequence is specific to E coli."]], []),
Jubatus::Classifier::Datum.new([["abstract", "Toxicity of carbon nanotube is still unclear."]], []),
Jubatus::Classifier::Datum.new([["abstract", "Proteinogenic amino acids are amino acids that are precursors to proteins."]], []),
]
res = client.classify($name, data)
p res
data.each { |d|
res = client.classify($name, [d])
# get the predicted genre
# p res
puts res[0].max{ |x, y| x[1] <=> y[1]}[0] + ": " + d.string_values[0][1]
}
end
# connect to the jubatus
client = Jubatus::Classifier::Client::Classifier.new($host, $port)
# run example
train(client)
predict(client)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment