Skip to content

Instantly share code, notes, and snippets.

@hezila
Forked from chrisdubois/submission.py
Last active August 29, 2015 14:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hezila/2bc8fe8c745fd081a09c to your computer and use it in GitHub Desktop.
Save hezila/2bc8fe8c745fd081a09c to your computer and use it in GitHub Desktop.
import graphlab as gl
import math
import random
train = gl.SFrame.read_csv('data/train.csv')
test = gl.SFrame.read_csv('data/test.csv')
del train['id']
def make_submission(m, test, filename):
preds = m.predict_topk(test, output_type='probability', k=9)
preds['id'] = preds['id'].astype(int) + 1
preds = preds.unstack(['class', 'probability'], 'probs').unpack('probs', '')
preds = preds.sort('id')
preds.save(filename)
def multiclass_logloss(model, test):
preds = model.predict_topk(test, output_type='probability', k=9)
preds = preds.unstack(['class', 'probability'], 'probs').unpack('probs', '')
preds['id'] = preds['id'].astype(int) + 1
preds = preds.sort('id')
preds['target'] = test['target']
neg_log_loss = 0
for row in preds:
label = row['target']
neg_log_loss += - math.log(row[label])
return neg_log_loss / preds.num_rows()
def shuffle(sf):
sf['_id'] = [random.random() for i in xrange(sf.num_rows())]
sf = sf.sort('_id')
del sf['_id']
return sf
def evaluate_logloss(model, train, valid):
return {'train_logloss': multiclass_logloss(model, train),
'valid_logloss': multiclass_logloss(model, valid)}
params = {'target': 'target',
'max_iterations': 250,
'max_depth': 10,
'min_child_weight': 4,
'row_subsample': .9,
'min_loss_reduction': 1,
'column_subsample': .8,
'validation_set': None}
train = shuffle(train)
# Check performance on internal validation set
tr, va = train.random_split(.8)
m = gl.boosted_trees_classifier.create(tr, **params)
print evaluate_logloss(m, tr, va)
# Make final submission by using full training set
m = gl.boosted_trees_classifier.create(train, **params)
make_submission(m, test, 'submission.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment