Last active
March 28, 2023 14:26
Experiment with PGN compression. Usage: python pgn-codebook.py corpus.pgn
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import chess.pgn | |
import huffman | |
import sys | |
# https://github.com/flok99/feeks/blob/master/psq.py | |
PSQT = { | |
chess.PAWN: [ | |
0, 0, 0, 0, 0, 0, 0, 0, | |
50, 50, 50, 50, 50, 50, 50, 50, | |
10, 10, 20, 30, 30, 20, 10, 10, | |
5, 5, 10, 25, 25, 10, 5, 5, | |
0, 0, 0, 20, 21, 0, 0, 0, | |
5, -5,-10, 0, 0,-10, -5, 5, | |
5, 10, 10,-31,-31, 10, 10, 5, | |
0, 0, 0, 0, 0, 0, 0, 0 | |
], | |
chess.KNIGHT: [ | |
-50,-40,-30,-30,-30,-30,-40,-50, | |
-40,-20, 0, 0, 0, 0,-20,-40, | |
-30, 0, 10, 15, 15, 10, 0,-30, | |
-30, 5, 15, 20, 20, 15, 5,-30, | |
-30, 0, 15, 20, 20, 15, 0,-30, | |
-30, 5, 10, 15, 15, 11, 5,-30, | |
-40,-20, 0, 5, 5, 0,-20,-40, | |
-50,-40,-30,-30,-30,-30,-40,-50 | |
], | |
chess.BISHOP: [ | |
-20,-10,-10,-10,-10,-10,-10,-20, | |
-10, 0, 0, 0, 0, 0, 0,-10, | |
-10, 0, 5, 10, 10, 5, 0,-10, | |
-10, 5, 5, 10, 10, 5, 5,-10, | |
-10, 0, 10, 10, 10, 10, 0,-10, | |
-10, 10, 10, 10, 10, 10, 10,-10, | |
-10, 5, 0, 0, 0, 0, 5,-10, | |
-20,-10,-10,-10,-10,-10,-10,-20 | |
], | |
chess.ROOK: [ | |
0, 0, 0, 0, 0, 0, 0, 0, | |
5, 10, 10, 10, 10, 10, 10, 5, | |
-5, 0, 0, 0, 0, 0, 0, -5, | |
-5, 0, 0, 0, 0, 0, 0, -5, | |
-5, 0, 0, 0, 0, 0, 0, -5, | |
-5, 0, 0, 0, 0, 0, 0, -5, | |
-5, 0, 0, 0, 0, 0, 0, -5, | |
0, 0, 0, 5, 5, 0, 0, 0 | |
], | |
chess.QUEEN: [ | |
-20,-10,-10, -5, -5,-10,-10,-20, | |
-10, 0, 0, 0, 0, 0, 0,-10, | |
-10, 0, 5, 5, 5, 5, 0,-10, | |
-5, 0, 5, 5, 5, 5, 0, -5, | |
0, 0, 5, 5, 5, 5, 0, -5, | |
-10, 5, 5, 5, 5, 5, 0,-10, | |
-10, 0, 5, 0, 0, 0, 0,-10, | |
-20,-10,-10, -5, -5,-10,-10,-20 | |
], | |
chess.KING: [ | |
-30,-40,-40,-50,-50,-40,-40,-30, | |
-30,-40,-40,-50,-50,-40,-40,-30, | |
-30,-40,-40,-50,-50,-40,-40,-30, | |
-30,-40,-40,-50,-50,-40,-40,-30, | |
-20,-30,-30,-40,-40,-30,-30,-20, | |
-10,-20,-20,-20,-20,-20,-20,-10, | |
20, 20, 0, 0, 0, 0, 20, 20, | |
1, 30, 10, 0, 0, 10, 30, 2 | |
] | |
} | |
def piece_value(color, piece_type, square): | |
return PSQT[piece_type][square if color == chess.BLACK else chess.square_mirror(square)] | |
def move_value(board, move): | |
piece_type = board.piece_type_at(move.from_square) | |
return piece_value(board.turn, piece_type, move.to_square) - piece_value(board.turn, piece_type, move.from_square) | |
def sorted_legals(board): | |
legals = list(board.legal_moves) | |
# More frequent moves should go first | |
legals.sort(reverse=True, key=lambda m: ( | |
m.promotion or 0, | |
board.is_capture(m), | |
move_value(board, m), | |
m.to_square, | |
m.from_square, | |
)) | |
return legals | |
class HistogramVisitor(chess.pgn.BaseVisitor): | |
def __init__(self): | |
super().__init__() | |
self.histogram = { i: 0 for i in range(256) } | |
def visit_move(self, board, move): | |
legals = sorted_legals(board) | |
idx = legals.index(move) | |
self.histogram[idx] += 1 | |
def bias(histogram): | |
for k, v in histogram.items(): | |
yield k, v + 1 | |
with open(sys.argv[1]) as pgn: | |
visitor = HistogramVisitor() | |
num_games = 0 | |
while chess.pgn.read_game(pgn, Visitor=lambda: visitor) and num_games < 1000: | |
num_games += 1 | |
code = list(sorted(huffman.codebook(bias(visitor.histogram)).values(), key=len)) | |
bits = sum(len(code[k]) * visitor.histogram[k] for k in range(256)) | |
for i in range(0, 256): | |
print(" new Symbol(0b{}, {}), // {}".format(code[i], len(code[i]), i)) | |
print(" //", bits / 8 / num_games, "bytes per game") | |
print(" //", bits / 8 / sum(visitor.histogram.values()), "bytes per move") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment