necaris/win-loss.py Secret

## win-loss.py
# Inspired by http://aadrake.com/command-line-tools-can-be-235x-faster-than-your-hadoop-cluster.html
# but trying to do the same set of tasks using Python distributed-computing
# tools i.e. dask
# Also based heavily on the examples in the documentation at http://dask.pydata.org/en/latest/bag.html
import dask.bag

# The result of the match is found in the line formatted '[Result "W-B"]'
# where W and B are 1, 0, or 1/2 representing win, loss, or draw and W
# and B are the White and Black players.

def only_result_lines(l):
    '''We only want the lines pertaining to the result.'''
    return l[1:7] == 'Result'

def extract_result_value(l):
    '''For each result value, return a simple key that tells us whether White
    won, Black won, or it was a draw -- W, B, or D. We can also return the value
    '-' which means we couldn't figure out that information.

    This function takes advantage of knowing the format of the result string
    quite precisely, to be able to slice the relevant fields out of it.'''
    value = l[9:-4]
    results = value.split('-')
    if len(results) != 2:
        return '-'

    w, b = results
    if w == '1':
        return 'W'
    elif b == '1':
        return 'B'
    elif w == b:
        return 'D'
    else:
        return '-'


b = dask.bag.from_filenames("data/*.pgn", encoding='iso8859-1', linesep='\r\n')
result_lines = b.filter(only_result_lines)
result_values = result_lines.map(extract_result_value)
win_loss = result_values.frequencies()
result = win_loss.compute()

print("win-loss ratio:")
total = 0
for line in result:
    print("{}: {}".format(*line))
    total += line[1]
print("total games:", total)
	# Inspired by http://aadrake.com/command-line-tools-can-be-235x-faster-than-your-hadoop-cluster.html
	# but trying to do the same set of tasks using Python distributed-computing
	# tools i.e. dask
	# Also based heavily on the examples in the documentation at http://dask.pydata.org/en/latest/bag.html
	import dask.bag

	# The result of the match is found in the line formatted '[Result "W-B"]'
	# where W and B are 1, 0, or 1/2 representing win, loss, or draw and W
	# and B are the White and Black players.

	def only_result_lines(l):
	'''We only want the lines pertaining to the result.'''
	return l[1:7] == 'Result'

	def extract_result_value(l):
	'''For each result value, return a simple key that tells us whether White
	won, Black won, or it was a draw -- W, B, or D. We can also return the value
	'-' which means we couldn't figure out that information.

	This function takes advantage of knowing the format of the result string
	quite precisely, to be able to slice the relevant fields out of it.'''
	value = l[9:-4]
	results = value.split('-')
	if len(results) != 2:
	return '-'

	w, b = results
	if w == '1':
	return 'W'
	elif b == '1':
	return 'B'
	elif w == b:
	return 'D'
	else:
	return '-'


	b = dask.bag.from_filenames("data/*.pgn", encoding='iso8859-1', linesep='\r\n')
	result_lines = b.filter(only_result_lines)
	result_values = result_lines.map(extract_result_value)
	win_loss = result_values.frequencies()
	result = win_loss.compute()

	print("win-loss ratio:")
	total = 0
	for line in result:
	print("{}: {}".format(*line))
	total += line[1]
	print("total games:", total)