-
-
Save necaris/316f63e7ea33ca6229a4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Inspired by http://aadrake.com/command-line-tools-can-be-235x-faster-than-your-hadoop-cluster.html | |
# but trying to do the same set of tasks using Python distributed-computing | |
# tools i.e. dask | |
# Also based heavily on the examples in the documentation at http://dask.pydata.org/en/latest/bag.html | |
import dask.bag | |
# The result of the match is found in the line formatted '[Result "W-B"]' | |
# where W and B are 1, 0, or 1/2 representing win, loss, or draw and W | |
# and B are the White and Black players. | |
def only_result_lines(l): | |
'''We only want the lines pertaining to the result.''' | |
return l[1:7] == 'Result' | |
def extract_result_value(l): | |
'''For each result value, return a simple key that tells us whether White | |
won, Black won, or it was a draw -- W, B, or D. We can also return the value | |
'-' which means we couldn't figure out that information. | |
This function takes advantage of knowing the format of the result string | |
quite precisely, to be able to slice the relevant fields out of it.''' | |
value = l[9:-4] | |
results = value.split('-') | |
if len(results) != 2: | |
return '-' | |
w, b = results | |
if w == '1': | |
return 'W' | |
elif b == '1': | |
return 'B' | |
elif w == b: | |
return 'D' | |
else: | |
return '-' | |
b = dask.bag.from_filenames("data/*.pgn", encoding='iso8859-1', linesep='\r\n') | |
result_lines = b.filter(only_result_lines) | |
result_values = result_lines.map(extract_result_value) | |
win_loss = result_values.frequencies() | |
result = win_loss.compute() | |
print("win-loss ratio:") | |
total = 0 | |
for line in result: | |
print("{}: {}".format(*line)) | |
total += line[1] | |
print("total games:", total) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment