Skip to content

Instantly share code, notes, and snippets.

@louisdorard
Forked from bmurzeau/gist:04a057647330aba14224
Last active August 29, 2015 14:20
Show Gist options
  • Save louisdorard/06d2d297523dabb2048d to your computer and use it in GitHub Desktop.
Save louisdorard/06d2d297523dabb2048d to your computer and use it in GitHub Desktop.
require 'rubygems'
require 'bundler/setup'
require 'benchmark'
require 'predicsis_ml_sdk'
PredicsisMlSdk.configure do |config|
config.logger = Logger.new(STDOUT)
config.logger.level = Logger::WARN
end
# Preconditions:
TOKEN = 'MY_TOKEN_HERE' # please visit https://developer.predicsis.com'
SEPARATOR = ','
HEADER = false
TARGET_VARIABLE = 'Var1'
MAIN_MODALITY = 'True'
TRAIN_KEY, TEST_KEY = case ARGV[0]
when 'kdd'
['blog/kdd_2008_breast_cancer_train.txt',
'blog/kdd_2008_breast_cancer_score.txt']
when 'kaggle'
['blog/kaggle_credit_scoring_train.txt',
'blog/kaggle_credit_scoring_score.txt']
else
puts "Invalid argument: expected 'kdd' or 'kaggle'"
exit 1
end
class Workflow
# Start workflow
def self.perform
$stderr.puts 'Initialize data'
init
global_time = Benchmark.measure do
# Building model
time = Benchmark.measure do
$stderr.puts 'Building model'
# Create a dictionary
dictionary_params = { name: 'My Kdd Dictionary', dataset_id: @learning_dataset.id,
header: HEADER, separator: SEPARATOR }
@dictionary = PredicsisMlSdk::Dictionary.create(dictionary_params, TOKEN)
# Select and update target variable to unsure that it is categorical
@dictionary.wait_for_result
@variable = @dictionary.variables.select do |v|
v.update(type: 'categorical') if v.name.eql?(TARGET_VARIABLE)
end.first
# Create preparation rules
preparation_rules_params = { name: 'My Kdd Preparation Rules Set',
variable_id: @variable.id,
dataset_id: @learning_dataset.id }
@preparation_rules = PredicsisMlSdk::PreparationRulesSet.create(
preparation_rules_params, TOKEN)
@preparation_rules.wait_for_result
# Create the model
classifier_params = { type: 'classifier', title: 'My Classifier',
preparation_rules_set_id: @preparation_rules.id }
@classifier = PredicsisMlSdk::Model.create(classifier_params, TOKEN)
@classifier.wait_for_result
end
$stderr.puts "completed in: #{time.real} seconds"
# Generating predictions
time = Benchmark.measure do
$stderr.puts 'Generating predictions'
# Create a modalities set to get all available modalities for the target variable
modalities_set_params = { name: 'My Kdd Modalities Set',
variable_id: @variable.id,
dataset_id: @learning_dataset.id }
@modalities_set = PredicsisMlSdk::ModalitiesSet.create(modalities_set_params, TOKEN)
@modalities_set.wait_for_result
# Apply a score
scoreset_params = { name: 'My Kdd Scoreset', classifier_id: @classifier.id,
separator: SEPARATOR, header: HEADER,
dataset_id: @scoring_dataset.id,
modalities_set_id: @modalities_set.id,
main_modality: MAIN_MODALITY,
data_file: { filename: 'output.txt' } }
@scoreset = PredicsisMlSdk::Dataset.create(scoreset_params, TOKEN)
@scoreset.wait_for_result
end
$stderr.puts "completed in: #{time.real} seconds"
end
$stderr.puts "Predicsis finished in: #{global_time.real} seconds"
rescue => e
raise e
end
def self.init
# Create a learning source
learning_source_params = { name: 'My Kdd Train Source', bucket: 'predicsis-data-demos', key: TRAIN_KEY }
@learning_source = PredicsisMlSdk::Source.create(learning_source_params, TOKEN)
# Create a learning dataset
learning_dataset_params = { source_ids: [@learning_source.id], header: HEADER,
name: 'My Kdd Train Dataset', separator: SEPARATOR }
@learning_dataset = PredicsisMlSdk::Dataset.create(learning_dataset_params, TOKEN)
@learning_dataset.wait_for_result
# Create a scoring source
scoring_source_params = { name: 'My Kdd Test Source', bucket: 'predicsis-data-demos', key: TEST_KEY }
@scoring_source = PredicsisMlSdk::Source.create(scoring_source_params, TOKEN)
# Create a scoring dataset
scoring_dataset_params = { source_ids: [@scoring_source.id], header: HEADER,
separator: SEPARATOR, name: 'My Kdd Test Dataset' }
@scoring_dataset = PredicsisMlSdk::Dataset.create(scoring_dataset_params, TOKEN)
@scoring_dataset.wait_for_result
end
end
if __FILE__ == $0
begin
Workflow.perform
rescue => e
PredicsisMlSdk.logger.error "#{e.message}: #{e.error if e.respond_to? :error}"
exit 1
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment