Skip to content

Instantly share code, notes, and snippets.

@arnab
Created January 2, 2014 09:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arnab/8217009 to your computer and use it in GitHub Desktop.
Save arnab/8217009 to your computer and use it in GitHub Desktop.
require "java"
require "./lib/weka.jar"
def read_data(filename)
Java::WekaCoreConverters::ConverterUtils::DataSource.new(filename).get_data_set
end
def normalize(data)
puts data.first.to_double_array.inspect
normalizer = Java::WekaFiltersUnsupervisedAttribute::Normalize.new
normalizer.set_input_format(data)
data = Java::WekaFilters::Filter.use_filter(data, normalizer)
puts data.first.to_double_array.inspect
data
end
def train(algo_type, data)
algo = Java::WekaClassifiersFunctions::LibSVM.new
# for SVM
options = [
['S', 0], # svm:type C-SVC
['K', 2], # kernel-type: linear
['D', 0], # degree
['G', 0], # gamma
['R', 0], # coef0
['C', 10], # Coeff C
['N', 0], # nu
['Z', 0], # do not normalize
['E', 0.001], # tolerance: eps
['H', 0], # do not use shrinking
['M', 1000] # cache memory, 1GB
]
options = options.map {|o| "-#{o.join(' ')}"}
algo.set_options(options)
puts "building model with #{algo.class} on #{data.size} data points, with options: #{options.join(' ')}"
algo.build_classifier(data)
algo
end
def test(model, data)
classified_attr_num = data.num_attributes - 1
failures = []
puts "Testing #{data.size} data points"
data.each_with_index do |d, i|
expected_class = d.to_string(classified_attr_num)
res_class = model.classify_instance(d)
d.set_class_value(res_class)
found_class = d.to_string(classified_attr_num)
failures << i unless expected_class == found_class
if i % 1000 == 0
puts "Accuracy at #{i}/#{data.size}: #{100 * (1 - failures.size.to_f / (i+1))} %"
end
end
puts "Accuracy: #{100 * (1 - failures.size.to_f / data.size)} %"
end
training_data = read_data("data/img_train_data.csv")
test_data = read_data("data/img_test_data.csv")
# classify the last attr (the label)
[training_data, test_data].each do |dataset|
dataset.set_class_index(dataset.num_attributes - 1)
end
training_data = normalize(training_data)
test_data = normalize(test_data)
model = train(:svm, training_data)
test(model, test_data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment