Skip to content

Instantly share code, notes, and snippets.

@ololobus
Last active October 7, 2015 14:48
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ololobus/116d36585a4fc08b3a79 to your computer and use it in GitHub Desktop.
Save ololobus/116d36585a4fc08b3a79 to your computer and use it in GitHub Desktop.
Scoring scripts for Data Science Week 2015 Datathon in Moscow
#!/usr/bin/env python
import sys
import json
team_path = '/users/team%s/ozon_recoms.txt' % sys.argv[1]
badass_detected = False
with open(team_path) as f:
for line in f:
data = json.loads(line)
tmrs = data['recoms']
if len(tmrs.keys()) > 1000:
badass_detected = True
break
if badass_detected:
print('Badass detected!')
else:
print('OK')
#!/usr/bin/env python
import sys
import json
import math
true_path = 'true_recoms.txt'
team_path = 'recoms.txt'
result = { 'score': 0, 'details': 'failed' }
true_recs = {}
with open(true_path) as f:
for line in f:
data = json.loads(line)
true_recs[data['item']] = data['true_recoms']
ndcg_sum = 0
lines_number = 0
with open(team_path) as f:
for line in f:
data = json.loads(line)
lines_number += 1
trs = true_recs[data['item']]
tmrs = data['recoms']
sorted_trs = sorted(trs.items(), key = lambda x: float(x[1]), reverse = True)
sorted_tmrs = sorted(tmrs.items(), key = lambda x: float(x[1]), reverse = True)
dcg = 0
idcg = 0
for i in range(len(trs)):
delta = sorted_trs[i][1]
if i + 1 != 1:
delta = delta / float(math.log(i + 1, 2))
idcg += delta
for i in range(len(tmrs)):
if sorted_tmrs[i][0] in trs:
delta = trs[sorted_tmrs[i][0]]
if i + 1 != 1:
delta = delta / float(math.log(i + 1, 2))
dcg += delta
ndcg_sum += dcg / idcg
if lines_number != len(true_recs.keys()):
result['details'] = 'Wrong lines number'
else:
result['details'] = 'ok'
result['score'] = ndcg_sum / lines_number
print repr(result)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import json
import operator
import math
from itertools import izip
true_path = 'true_hh_salaries.txt'
team_path = 'hh_salaries.txt'
result = { 'score': 1, 'details': 'failed' }
to_error = 0
from_error = 0
to_lines_number = 0
to_total = 0
from_lines_number = 0
from_total = 0
for true_line, team_line in izip(open(true_path), open(team_path)):
trdata = json.loads(true_line)
tmdata = json.loads(team_line)
if trdata['id'] != tmdata['id']:
raise('Ids mismatch error!')
break
if trdata['salary']['to']:
to_total += 1
if tmdata['salary']['to']:
to_lines_number += 1
trto = float(trdata['salary']['to'])
tmto = float(tmdata['salary']['to'])
to_error += abs(min(trto, tmto) / max(trto, tmto) - 1)**2
if trdata['salary']['from']:
from_total += 1
if tmdata['salary']['from']:
from_lines_number += 1
trfrom = float(trdata['salary']['from'])
tmfrom = float(tmdata['salary']['from'])
from_error += abs(min(trfrom, tmfrom) / max(trfrom, tmfrom) - 1)**2
predicted_to = to_lines_number / float(to_total)
predicted_from = from_lines_number / float(from_total)
if predicted_to >= 0.5 or predicted_from >= 0.5:
to_error = math.sqrt(to_error / to_lines_number)
from_error = math.sqrt(from_error / from_lines_number)
result['score'] = (to_error + from_error) / 2
result['details'] = 'Average relative error for "to" – %s, "from" – %s. Predicted "to" lines – %s, "from" – %s' % (to_error, from_error, predicted_to, predicted_from)
else:
result['details'] = 'Not enough to/from lines predicted'
print repr(result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment