Last active
October 7, 2015 14:48
-
-
Save ololobus/116d36585a4fc08b3a79 to your computer and use it in GitHub Desktop.
Scoring scripts for Data Science Week 2015 Datathon in Moscow
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import json | |
team_path = '/users/team%s/ozon_recoms.txt' % sys.argv[1] | |
badass_detected = False | |
with open(team_path) as f: | |
for line in f: | |
data = json.loads(line) | |
tmrs = data['recoms'] | |
if len(tmrs.keys()) > 1000: | |
badass_detected = True | |
break | |
if badass_detected: | |
print('Badass detected!') | |
else: | |
print('OK') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import json | |
import math | |
true_path = 'true_recoms.txt' | |
team_path = 'recoms.txt' | |
result = { 'score': 0, 'details': 'failed' } | |
true_recs = {} | |
with open(true_path) as f: | |
for line in f: | |
data = json.loads(line) | |
true_recs[data['item']] = data['true_recoms'] | |
ndcg_sum = 0 | |
lines_number = 0 | |
with open(team_path) as f: | |
for line in f: | |
data = json.loads(line) | |
lines_number += 1 | |
trs = true_recs[data['item']] | |
tmrs = data['recoms'] | |
sorted_trs = sorted(trs.items(), key = lambda x: float(x[1]), reverse = True) | |
sorted_tmrs = sorted(tmrs.items(), key = lambda x: float(x[1]), reverse = True) | |
dcg = 0 | |
idcg = 0 | |
for i in range(len(trs)): | |
delta = sorted_trs[i][1] | |
if i + 1 != 1: | |
delta = delta / float(math.log(i + 1, 2)) | |
idcg += delta | |
for i in range(len(tmrs)): | |
if sorted_tmrs[i][0] in trs: | |
delta = trs[sorted_tmrs[i][0]] | |
if i + 1 != 1: | |
delta = delta / float(math.log(i + 1, 2)) | |
dcg += delta | |
ndcg_sum += dcg / idcg | |
if lines_number != len(true_recs.keys()): | |
result['details'] = 'Wrong lines number' | |
else: | |
result['details'] = 'ok' | |
result['score'] = ndcg_sum / lines_number | |
print repr(result) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import sys | |
import json | |
import operator | |
import math | |
from itertools import izip | |
true_path = 'true_hh_salaries.txt' | |
team_path = 'hh_salaries.txt' | |
result = { 'score': 1, 'details': 'failed' } | |
to_error = 0 | |
from_error = 0 | |
to_lines_number = 0 | |
to_total = 0 | |
from_lines_number = 0 | |
from_total = 0 | |
for true_line, team_line in izip(open(true_path), open(team_path)): | |
trdata = json.loads(true_line) | |
tmdata = json.loads(team_line) | |
if trdata['id'] != tmdata['id']: | |
raise('Ids mismatch error!') | |
break | |
if trdata['salary']['to']: | |
to_total += 1 | |
if tmdata['salary']['to']: | |
to_lines_number += 1 | |
trto = float(trdata['salary']['to']) | |
tmto = float(tmdata['salary']['to']) | |
to_error += abs(min(trto, tmto) / max(trto, tmto) - 1)**2 | |
if trdata['salary']['from']: | |
from_total += 1 | |
if tmdata['salary']['from']: | |
from_lines_number += 1 | |
trfrom = float(trdata['salary']['from']) | |
tmfrom = float(tmdata['salary']['from']) | |
from_error += abs(min(trfrom, tmfrom) / max(trfrom, tmfrom) - 1)**2 | |
predicted_to = to_lines_number / float(to_total) | |
predicted_from = from_lines_number / float(from_total) | |
if predicted_to >= 0.5 or predicted_from >= 0.5: | |
to_error = math.sqrt(to_error / to_lines_number) | |
from_error = math.sqrt(from_error / from_lines_number) | |
result['score'] = (to_error + from_error) / 2 | |
result['details'] = 'Average relative error for "to" – %s, "from" – %s. Predicted "to" lines – %s, "from" – %s' % (to_error, from_error, predicted_to, predicted_from) | |
else: | |
result['details'] = 'Not enough to/from lines predicted' | |
print repr(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment