Skip to content

Instantly share code, notes, and snippets.

@nebuta
Created May 19, 2013 07:59
Show Gist options
  • Save nebuta/5607014 to your computer and use it in GitHub Desktop.
Save nebuta/5607014 to your computer and use it in GitHub Desktop.
Tweets classification test
# -*- coding: utf-8 -*-
$host = "localhost"
$port = 9199
$name = "twitter_user"
require 'json'
require 'find'
require 'jubatus/classifier/client'
require 'jubatus/classifier/types'
def train(client,shown)
# prepare training data
# predict the last ones (that are commented out)
files = ["tweet_kenichiromogi","tweet_t_ishin"]
train_data = []
files.each{|e|
puts "Samples: " + e if shown
Find.find(e){|f|
next if File.directory? f
next if File.extname(f) != ".txt"
print File.basename(f,".txt") + " " if shown
ts = IO.readlines(f).join("\n").split("\n\n\n")
ts.each{|t|
train_data += [[e,Jubatus::Classifier::Datum.new([["tweet",t]],[])]]
}
}
puts if shown
}
# training data must be shuffled on online learning!
train_data.sort_by{rand}
# run train
r = client.train($name, train_data)
puts(r.to_s + " samples.") if shown
end
def predict(client)
# predict the last shogun
result = []
data = []
Find.find("tweet_unknown"){|f|
next if File.directory? f
next if File.extname(f) != ".txt"
ts = IO.readlines(f).join("\n").split("\n\n\n").map{|t| t.chomp}.delete_if{|t| t == ''}
ts.each{|t|
data << Jubatus::Classifier::Datum.new([["tweet",t],["filename",File.basename(f,".txt")]],[])
}
}
# p data
res = client.classify($name, data)
# p res
data.each { |d|
res = client.classify($name, [d])
# get the predicted genre
# p d
# p res
result << [d.string_values[0][1],res[0].max{ |x, y| x[1] <=> y[1]}[0].split('_')[1..100].join('_')]
puts result.last[1]
}
result.sort_by{|e| e[1]}.each{|r|
puts r[0]
puts "--->" + r[1]
puts
}
end
# connect to the jubatus
client = Jubatus::Classifier::Client::Classifier.new($host, $port)
# run example
train(client,true)
train(client,false)
sleep(1)
predict(client)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment