Skip to content

Instantly share code, notes, and snippets.

@niklasf niklasf/pgn-codebook.py
Last active Jan 19, 2018

Embed
What would you like to do?
Experiment with PGN compression. Usage: python pgn-codebook.py corpus.pgn
import chess.pgn
import huffman
import sys
# https://github.com/flok99/feeks/blob/master/psq.py
PSQT = {
chess.PAWN: [
0, 0, 0, 0, 0, 0, 0, 0,
50, 50, 50, 50, 50, 50, 50, 50,
10, 10, 20, 30, 30, 20, 10, 10,
5, 5, 10, 25, 25, 10, 5, 5,
0, 0, 0, 20, 21, 0, 0, 0,
5, -5,-10, 0, 0,-10, -5, 5,
5, 10, 10,-31,-31, 10, 10, 5,
0, 0, 0, 0, 0, 0, 0, 0
],
chess.KNIGHT: [
-50,-40,-30,-30,-30,-30,-40,-50,
-40,-20, 0, 0, 0, 0,-20,-40,
-30, 0, 10, 15, 15, 10, 0,-30,
-30, 5, 15, 20, 20, 15, 5,-30,
-30, 0, 15, 20, 20, 15, 0,-30,
-30, 5, 10, 15, 15, 11, 5,-30,
-40,-20, 0, 5, 5, 0,-20,-40,
-50,-40,-30,-30,-30,-30,-40,-50
],
chess.BISHOP: [
-20,-10,-10,-10,-10,-10,-10,-20,
-10, 0, 0, 0, 0, 0, 0,-10,
-10, 0, 5, 10, 10, 5, 0,-10,
-10, 5, 5, 10, 10, 5, 5,-10,
-10, 0, 10, 10, 10, 10, 0,-10,
-10, 10, 10, 10, 10, 10, 10,-10,
-10, 5, 0, 0, 0, 0, 5,-10,
-20,-10,-10,-10,-10,-10,-10,-20
],
chess.ROOK: [
0, 0, 0, 0, 0, 0, 0, 0,
5, 10, 10, 10, 10, 10, 10, 5,
-5, 0, 0, 0, 0, 0, 0, -5,
-5, 0, 0, 0, 0, 0, 0, -5,
-5, 0, 0, 0, 0, 0, 0, -5,
-5, 0, 0, 0, 0, 0, 0, -5,
-5, 0, 0, 0, 0, 0, 0, -5,
0, 0, 0, 5, 5, 0, 0, 0
],
chess.QUEEN: [
-20,-10,-10, -5, -5,-10,-10,-20,
-10, 0, 0, 0, 0, 0, 0,-10,
-10, 0, 5, 5, 5, 5, 0,-10,
-5, 0, 5, 5, 5, 5, 0, -5,
0, 0, 5, 5, 5, 5, 0, -5,
-10, 5, 5, 5, 5, 5, 0,-10,
-10, 0, 5, 0, 0, 0, 0,-10,
-20,-10,-10, -5, -5,-10,-10,-20
],
chess.KING: [
-30,-40,-40,-50,-50,-40,-40,-30,
-30,-40,-40,-50,-50,-40,-40,-30,
-30,-40,-40,-50,-50,-40,-40,-30,
-30,-40,-40,-50,-50,-40,-40,-30,
-20,-30,-30,-40,-40,-30,-30,-20,
-10,-20,-20,-20,-20,-20,-20,-10,
20, 20, 0, 0, 0, 0, 20, 20,
1, 30, 10, 0, 0, 10, 30, 2
]
}
def piece_value(color, piece_type, square):
return PSQT[piece_type][square if color == chess.BLACK else chess.square_mirror(square)]
def move_value(board, move):
piece_type = board.piece_type_at(move.from_square)
return piece_value(board.turn, piece_type, move.to_square) - piece_value(board.turn, piece_type, move.from_square)
def sorted_legals(board):
legals = list(board.legal_moves)
# More frequent moves should go first
legals.sort(reverse=True, key=lambda m: (
m.promotion or 0,
board.is_capture(m),
move_value(board, m),
m.to_square,
m.from_square,
))
return legals
class HistogramVisitor(chess.pgn.BaseVisitor):
def __init__(self):
super().__init__()
self.histogram = { i: 0 for i in range(256) }
def visit_move(self, board, move):
legals = sorted_legals(board)
idx = legals.index(move)
self.histogram[idx] += 1
def bias(histogram):
for k, v in histogram.items():
yield k, v + 1
with open(sys.argv[1]) as pgn:
visitor = HistogramVisitor()
num_games = 0
while chess.pgn.read_game(pgn, Visitor=lambda: visitor) and num_games < 1000:
num_games += 1
code = list(sorted(huffman.codebook(bias(visitor.histogram)).values(), key=len))
bits = sum(len(code[k]) * visitor.histogram[k] for k in range(256))
for i in range(0, 256):
print(" new Symbol(0b{}, {}), // {}".format(code[i], len(code[i]), i))
print(" //", bits / 8 / num_games, "bytes per game")
print(" //", bits / 8 / sum(visitor.histogram.values()), "bytes per move")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.