Last active
March 28, 2023 14:26
-
-
Save niklasf/933053f76a418cfe4d52f2f1af997c6f to your computer and use it in GitHub Desktop.
Experiment with PGN compression. Usage: python pgn-codebook.py corpus.pgn
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import chess.pgn | |
import huffman | |
import sys | |
# https://github.com/flok99/feeks/blob/master/psq.py | |
PSQT = { | |
chess.PAWN: [ | |
0, 0, 0, 0, 0, 0, 0, 0, | |
50, 50, 50, 50, 50, 50, 50, 50, | |
10, 10, 20, 30, 30, 20, 10, 10, | |
5, 5, 10, 25, 25, 10, 5, 5, | |
0, 0, 0, 20, 21, 0, 0, 0, | |
5, -5,-10, 0, 0,-10, -5, 5, | |
5, 10, 10,-31,-31, 10, 10, 5, | |
0, 0, 0, 0, 0, 0, 0, 0 | |
], | |
chess.KNIGHT: [ | |
-50,-40,-30,-30,-30,-30,-40,-50, | |
-40,-20, 0, 0, 0, 0,-20,-40, | |
-30, 0, 10, 15, 15, 10, 0,-30, | |
-30, 5, 15, 20, 20, 15, 5,-30, | |
-30, 0, 15, 20, 20, 15, 0,-30, | |
-30, 5, 10, 15, 15, 11, 5,-30, | |
-40,-20, 0, 5, 5, 0,-20,-40, | |
-50,-40,-30,-30,-30,-30,-40,-50 | |
], | |
chess.BISHOP: [ | |
-20,-10,-10,-10,-10,-10,-10,-20, | |
-10, 0, 0, 0, 0, 0, 0,-10, | |
-10, 0, 5, 10, 10, 5, 0,-10, | |
-10, 5, 5, 10, 10, 5, 5,-10, | |
-10, 0, 10, 10, 10, 10, 0,-10, | |
-10, 10, 10, 10, 10, 10, 10,-10, | |
-10, 5, 0, 0, 0, 0, 5,-10, | |
-20,-10,-10,-10,-10,-10,-10,-20 | |
], | |
chess.ROOK: [ | |
0, 0, 0, 0, 0, 0, 0, 0, | |
5, 10, 10, 10, 10, 10, 10, 5, | |
-5, 0, 0, 0, 0, 0, 0, -5, | |
-5, 0, 0, 0, 0, 0, 0, -5, | |
-5, 0, 0, 0, 0, 0, 0, -5, | |
-5, 0, 0, 0, 0, 0, 0, -5, | |
-5, 0, 0, 0, 0, 0, 0, -5, | |
0, 0, 0, 5, 5, 0, 0, 0 | |
], | |
chess.QUEEN: [ | |
-20,-10,-10, -5, -5,-10,-10,-20, | |
-10, 0, 0, 0, 0, 0, 0,-10, | |
-10, 0, 5, 5, 5, 5, 0,-10, | |
-5, 0, 5, 5, 5, 5, 0, -5, | |
0, 0, 5, 5, 5, 5, 0, -5, | |
-10, 5, 5, 5, 5, 5, 0,-10, | |
-10, 0, 5, 0, 0, 0, 0,-10, | |
-20,-10,-10, -5, -5,-10,-10,-20 | |
], | |
chess.KING: [ | |
-30,-40,-40,-50,-50,-40,-40,-30, | |
-30,-40,-40,-50,-50,-40,-40,-30, | |
-30,-40,-40,-50,-50,-40,-40,-30, | |
-30,-40,-40,-50,-50,-40,-40,-30, | |
-20,-30,-30,-40,-40,-30,-30,-20, | |
-10,-20,-20,-20,-20,-20,-20,-10, | |
20, 20, 0, 0, 0, 0, 20, 20, | |
1, 30, 10, 0, 0, 10, 30, 2 | |
] | |
} | |
def piece_value(color, piece_type, square): | |
return PSQT[piece_type][square if color == chess.BLACK else chess.square_mirror(square)] | |
def move_value(board, move): | |
piece_type = board.piece_type_at(move.from_square) | |
return piece_value(board.turn, piece_type, move.to_square) - piece_value(board.turn, piece_type, move.from_square) | |
def sorted_legals(board): | |
legals = list(board.legal_moves) | |
# More frequent moves should go first | |
legals.sort(reverse=True, key=lambda m: ( | |
m.promotion or 0, | |
board.is_capture(m), | |
move_value(board, m), | |
m.to_square, | |
m.from_square, | |
)) | |
return legals | |
class HistogramVisitor(chess.pgn.BaseVisitor): | |
def __init__(self): | |
super().__init__() | |
self.histogram = { i: 0 for i in range(256) } | |
def visit_move(self, board, move): | |
legals = sorted_legals(board) | |
idx = legals.index(move) | |
self.histogram[idx] += 1 | |
def bias(histogram): | |
for k, v in histogram.items(): | |
yield k, v + 1 | |
with open(sys.argv[1]) as pgn: | |
visitor = HistogramVisitor() | |
num_games = 0 | |
while chess.pgn.read_game(pgn, Visitor=lambda: visitor) and num_games < 1000: | |
num_games += 1 | |
code = list(sorted(huffman.codebook(bias(visitor.histogram)).values(), key=len)) | |
bits = sum(len(code[k]) * visitor.histogram[k] for k in range(256)) | |
for i in range(0, 256): | |
print(" new Symbol(0b{}, {}), // {}".format(code[i], len(code[i]), i)) | |
print(" //", bits / 8 / num_games, "bytes per game") | |
print(" //", bits / 8 / sum(visitor.histogram.values()), "bytes per move") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment