Skip to content

Instantly share code, notes, and snippets.

@bmurzeau
Last active August 29, 2015 14:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save bmurzeau/04a057647330aba14224 to your computer and use it in GitHub Desktop.
Save bmurzeau/04a057647330aba14224 to your computer and use it in GitHub Desktop.
PredicSis API Script for both Kaggle Give Me Some Credit challenge and KDD Cup 2008 Breast Cancer (PredicSis API vs Google Prediction)
require 'rubygems'
require 'bundler/setup'
require 'benchmark'
require 'predicsis_ml_sdk'
PredicsisMlSdk.configure do |config|
config.logger = Logger.new(STDOUT)
config.logger.level = Logger::WARN
end
# Preconditions:
TOKEN = 'MY_TOKEN_HERE' # please visit https://developer.predicsis.com'
SEPARATOR = ','
HEADER = false
TARGET_VARIABLE = 'Var1'
MAIN_MODALITY = 'True'
TRAIN_KEY, TEST_KEY = case ARGV[0]
when 'kdd'
['blog/kdd_2008_breast_cancer_train.txt',
'blog/kdd_2008_breast_cancer_score.txt']
when 'kaggle'
['blog/kaggle_credit_scoring_train.txt',
'blog/kaggle_credit_scoring_score.txt']
else
puts "Invalid argument: expected 'kdd' or 'kaggle'"
exit 1
end
class Workflow
# Start workflow
def self.perform
$stderr.puts 'Initialize data'
init
global_time = Benchmark.measure do
# Building model
time = Benchmark.measure do
$stderr.puts 'Building model'
# Create a dictionary
dictionary_params = { name: 'My Kdd Dictionary', dataset_id: @learning_dataset.id,
header: HEADER, separator: SEPARATOR }
@dictionary = PredicsisMlSdk::Dictionary.create(dictionary_params, TOKEN)
# Select and update target variable to unsure that it is categorical
@dictionary.wait_for_result
@variable = @dictionary.variables.select do |v|
v.update(type: 'categorical') if v.name.eql?(TARGET_VARIABLE)
end.first
# Create preparation rules
preparation_rules_params = { name: 'My Kdd Preparation Rules Set',
variable_id: @variable.id,
dataset_id: @learning_dataset.id }
@preparation_rules = PredicsisMlSdk::PreparationRulesSet.create(
preparation_rules_params, TOKEN)
@preparation_rules.wait_for_result
# Create the model
classifier_params = { type: 'classifier', title: 'My Classifier',
preparation_rules_set_id: @preparation_rules.id }
@classifier = PredicsisMlSdk::Model.create(classifier_params, TOKEN)
@classifier.wait_for_result
end
$stderr.puts "completed in: #{time.real} seconds"
# Generating predictions
time = Benchmark.measure do
$stderr.puts 'Generating predictions'
# Create a modalities set to get all available modalities for the target variable
modalities_set_params = { name: 'My Kdd Modalities Set',
variable_id: @variable.id,
dataset_id: @learning_dataset.id }
@modalities_set = PredicsisMlSdk::ModalitiesSet.create(modalities_set_params, TOKEN)
@modalities_set.wait_for_result
# Apply a score
scoreset_params = { name: 'My Kdd Scoreset', classifier_id: @classifier.id,
separator: SEPARATOR, header: HEADER,
dataset_id: @scoring_dataset.id,
modalities_set_id: @modalities_set.id,
main_modality: MAIN_MODALITY,
data_file: { filename: 'output.txt' } }
@scoreset = PredicsisMlSdk::Dataset.create(scoreset_params, TOKEN)
@scoreset.wait_for_result
end
$stderr.puts "completed in: #{time.real} seconds"
end
$stderr.puts "Predicsis finished in: #{global_time.real} seconds"
rescue => e
raise e
end
def self.init
# Create a learning source
learning_source_params = { name: 'My Kdd Train Source', bucket: 'predicsis-data-demos', key: TRAIN_KEY }
@learning_source = PredicsisMlSdk::Source.create(learning_source_params, TOKEN)
# Create a learning dataset
learning_dataset_params = { source_ids: [@learning_source.id], header: HEADER,
name: 'My Kdd Train Dataset', separator: SEPARATOR }
@learning_dataset = PredicsisMlSdk::Dataset.create(learning_dataset_params, TOKEN)
@learning_dataset.wait_for_result
# Create a scoring source
scoring_source_params = { name: 'My Kdd Test Source', bucket: 'predicsis-data-demos', key: TEST_KEY }
@scoring_source = PredicsisMlSdk::Source.create(scoring_source_params, TOKEN)
# Create a scoring dataset
scoring_dataset_params = { source_ids: [@scoring_source.id], header: HEADER,
separator: SEPARATOR, name: 'My Kdd Test Dataset' }
@scoring_dataset = PredicsisMlSdk::Dataset.create(scoring_dataset_params, TOKEN)
@scoring_dataset.wait_for_result
end
end
if __FILE__ == $0
begin
Workflow.perform
rescue => e
PredicsisMlSdk.logger.error "#{e.message}: #{e.error if e.respond_to? :error}"
exit 1
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment