Create a gist now

Instantly share code, notes, and snippets.

@necaris /win-loss.py Secret
Created Mar 22, 2016

What would you like to do?
# Inspired by http://aadrake.com/command-line-tools-can-be-235x-faster-than-your-hadoop-cluster.html
# but trying to do the same set of tasks using Python distributed-computing
# tools i.e. dask
# Also based heavily on the examples in the documentation at http://dask.pydata.org/en/latest/bag.html
import dask.bag
# The result of the match is found in the line formatted '[Result "W-B"]'
# where W and B are 1, 0, or 1/2 representing win, loss, or draw and W
# and B are the White and Black players.
def only_result_lines(l):
'''We only want the lines pertaining to the result.'''
return l[1:7] == 'Result'
def extract_result_value(l):
'''For each result value, return a simple key that tells us whether White
won, Black won, or it was a draw -- W, B, or D. We can also return the value
'-' which means we couldn't figure out that information.
This function takes advantage of knowing the format of the result string
quite precisely, to be able to slice the relevant fields out of it.'''
value = l[9:-4]
results = value.split('-')
if len(results) != 2:
return '-'
w, b = results
if w == '1':
return 'W'
elif b == '1':
return 'B'
elif w == b:
return 'D'
else:
return '-'
b = dask.bag.from_filenames("data/*.pgn", encoding='iso8859-1', linesep='\r\n')
result_lines = b.filter(only_result_lines)
result_values = result_lines.map(extract_result_value)
win_loss = result_values.frequencies()
result = win_loss.compute()
print("win-loss ratio:")
total = 0
for line in result:
print("{}: {}".format(*line))
total += line[1]
print("total games:", total)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment