Created
February 11, 2016 18:13
-
-
Save Ladsgroup/6f54e24d3f3d1deebec4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
``time_scorer -h`` | |
:: | |
Tests a scorer model. This utility expects to get a file of | |
tab-separated feature values and labels from which to test a model. | |
Usage: | |
time_scorer -h | --help | |
time_scorer <wiki> [--revs=<path>] [--model=<model>] [--batch] | |
Options: | |
-h --help Prints this documentation | |
<wiki> Wiki DB | |
--revs=<path> Path to a file containing feature values and | |
labels [default: <stdin>] | |
--model=<model> Model [default: reverted] | |
--batch Batch | |
""" | |
import logging | |
import sys | |
import time | |
import urllib.request | |
import docopt | |
from itertools import zip_longest | |
def grouper(n, iterable, padvalue=None): | |
return zip_longest(*[iter(iterable)]*n, fillvalue=padvalue) | |
def main(argv=None): | |
args = docopt.docopt(__doc__, argv=argv) | |
wiki = args['<wiki>'] | |
if args['--revs'] == "<stdin>": | |
revs = sys.stdin | |
else: | |
revs = open(args['--revs'], 'r') | |
model = args['--model'] | |
batch = args['--batch'] | |
run(wiki, revs, model, batch) | |
def run(wiki, revs, model, batch): | |
base_url = "http://ores.wmflabs.org/scores/{wiki}/?models={model}&revids=".format(wiki=wiki, model=model) | |
batch_size = 50 if batch else 1 | |
print('Batch size: {0}'.format(batch_size)) | |
for rev_ids in grouper(batch_size, revs): | |
res_id = [] | |
for i in rev_ids: | |
if i is None: | |
continue | |
try: | |
int(i) | |
except: | |
continue | |
res_id.append(i.strip()) | |
if not res_id: | |
continue | |
url = base_url + '|'.join(res_id) | |
start_time = time.time() | |
notes = '' | |
try: | |
res = urllib.request.urlopen(url).read() | |
except Exception as e: | |
notes += 'error:' + str(e) | |
process_time = time.time() - start_time | |
for rev_id in res_id: | |
print(rev_id.strip(), process_time, notes) | |
main(sys.argv[1:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment