niklasf/pgn-codebook.py

## pgn-codebook.py
import chess.pgn
import huffman
import sys

# https://github.com/flok99/feeks/blob/master/psq.py

PSQT = {
    chess.PAWN: [
         0,  0,  0,  0,  0,  0,  0,  0,
        50, 50, 50, 50, 50, 50, 50, 50,
        10, 10, 20, 30, 30, 20, 10, 10,
         5,  5, 10, 25, 25, 10,  5,  5,
         0,  0,  0, 20, 21,  0,  0,  0,
         5, -5,-10,  0,  0,-10, -5,  5,
         5, 10, 10,-31,-31, 10, 10,  5,
         0,  0,  0,  0,  0,  0,  0,  0
    ],
    chess.KNIGHT: [
        -50,-40,-30,-30,-30,-30,-40,-50,
        -40,-20,  0,  0,  0,  0,-20,-40,
        -30,  0, 10, 15, 15, 10,  0,-30,
        -30,  5, 15, 20, 20, 15,  5,-30,
        -30,  0, 15, 20, 20, 15,  0,-30,
        -30,  5, 10, 15, 15, 11,  5,-30,
        -40,-20,  0,  5,  5,  0,-20,-40,
        -50,-40,-30,-30,-30,-30,-40,-50
    ],
    chess.BISHOP: [
        -20,-10,-10,-10,-10,-10,-10,-20,
        -10,  0,  0,  0,  0,  0,  0,-10,
        -10,  0,  5, 10, 10,  5,  0,-10,
        -10,  5,  5, 10, 10,  5,  5,-10,
        -10,  0, 10, 10, 10, 10,  0,-10,
        -10, 10, 10, 10, 10, 10, 10,-10,
        -10,  5,  0,  0,  0,  0,  5,-10,
        -20,-10,-10,-10,-10,-10,-10,-20
    ],
    chess.ROOK: [
          0,  0,  0,  0,  0,  0,  0,  0,
          5, 10, 10, 10, 10, 10, 10,  5,
         -5,  0,  0,  0,  0,  0,  0, -5,
         -5,  0,  0,  0,  0,  0,  0, -5,
         -5,  0,  0,  0,  0,  0,  0, -5,
         -5,  0,  0,  0,  0,  0,  0, -5,
         -5,  0,  0,  0,  0,  0,  0, -5,
          0,  0,  0,  5,  5,  0,  0,  0
    ],
    chess.QUEEN: [
        -20,-10,-10, -5, -5,-10,-10,-20,
        -10,  0,  0,  0,  0,  0,  0,-10,
        -10,  0,  5,  5,  5,  5,  0,-10,
         -5,  0,  5,  5,  5,  5,  0, -5,
          0,  0,  5,  5,  5,  5,  0, -5,
        -10,  5,  5,  5,  5,  5,  0,-10,
        -10,  0,  5,  0,  0,  0,  0,-10,
        -20,-10,-10, -5, -5,-10,-10,-20
    ],
    chess.KING: [
        -30,-40,-40,-50,-50,-40,-40,-30,
        -30,-40,-40,-50,-50,-40,-40,-30,
        -30,-40,-40,-50,-50,-40,-40,-30,
        -30,-40,-40,-50,-50,-40,-40,-30,
        -20,-30,-30,-40,-40,-30,-30,-20,
        -10,-20,-20,-20,-20,-20,-20,-10,
         20, 20,  0,  0,  0,  0, 20, 20,
          1, 30, 10,  0,  0, 10, 30,  2
    ]
}

def piece_value(color, piece_type, square):
    return PSQT[piece_type][square if color == chess.BLACK else chess.square_mirror(square)]

def move_value(board, move):
    piece_type = board.piece_type_at(move.from_square)
    return piece_value(board.turn, piece_type, move.to_square) - piece_value(board.turn, piece_type, move.from_square)

def sorted_legals(board):
    legals = list(board.legal_moves)

    # More frequent moves should go first
    legals.sort(reverse=True, key=lambda m: (
        m.promotion or 0,
        board.is_capture(m),
        move_value(board, m),
        m.to_square,
        m.from_square,
    ))

    return legals

class HistogramVisitor(chess.pgn.BaseVisitor):
    def __init__(self):
        super().__init__()
        self.histogram = { i: 0 for i in range(256) }

    def visit_move(self, board, move):
        legals = sorted_legals(board)

        idx = legals.index(move)
        self.histogram[idx] += 1

def bias(histogram):
    for k, v in histogram.items():
        yield k, v + 1

with open(sys.argv[1]) as pgn:
    visitor = HistogramVisitor()

    num_games = 0
    while chess.pgn.read_game(pgn, Visitor=lambda: visitor) and num_games < 1000:
        num_games += 1

    code = list(sorted(huffman.codebook(bias(visitor.histogram)).values(), key=len))

    bits = sum(len(code[k]) * visitor.histogram[k] for k in range(256))

    for i in range(0, 256):
        print("        new Symbol(0b{}, {}), // {}".format(code[i], len(code[i]), i))

    print("    //", bits / 8 / num_games, "bytes per game")
    print("    //", bits / 8 / sum(visitor.histogram.values()), "bytes per move")
	import chess.pgn
	import huffman
	import sys

	# https://github.com/flok99/feeks/blob/master/psq.py

	PSQT = {
	chess.PAWN: [
	0, 0, 0, 0, 0, 0, 0, 0,
	50, 50, 50, 50, 50, 50, 50, 50,
	10, 10, 20, 30, 30, 20, 10, 10,
	5, 5, 10, 25, 25, 10, 5, 5,
	0, 0, 0, 20, 21, 0, 0, 0,
	5, -5,-10, 0, 0,-10, -5, 5,
	5, 10, 10,-31,-31, 10, 10, 5,
	0, 0, 0, 0, 0, 0, 0, 0
	],
	chess.KNIGHT: [
	-50,-40,-30,-30,-30,-30,-40,-50,
	-40,-20, 0, 0, 0, 0,-20,-40,
	-30, 0, 10, 15, 15, 10, 0,-30,
	-30, 5, 15, 20, 20, 15, 5,-30,
	-30, 0, 15, 20, 20, 15, 0,-30,
	-30, 5, 10, 15, 15, 11, 5,-30,
	-40,-20, 0, 5, 5, 0,-20,-40,
	-50,-40,-30,-30,-30,-30,-40,-50
	],
	chess.BISHOP: [
	-20,-10,-10,-10,-10,-10,-10,-20,
	-10, 0, 0, 0, 0, 0, 0,-10,
	-10, 0, 5, 10, 10, 5, 0,-10,
	-10, 5, 5, 10, 10, 5, 5,-10,
	-10, 0, 10, 10, 10, 10, 0,-10,
	-10, 10, 10, 10, 10, 10, 10,-10,
	-10, 5, 0, 0, 0, 0, 5,-10,
	-20,-10,-10,-10,-10,-10,-10,-20
	],
	chess.ROOK: [
	0, 0, 0, 0, 0, 0, 0, 0,
	5, 10, 10, 10, 10, 10, 10, 5,
	-5, 0, 0, 0, 0, 0, 0, -5,
	-5, 0, 0, 0, 0, 0, 0, -5,
	-5, 0, 0, 0, 0, 0, 0, -5,
	-5, 0, 0, 0, 0, 0, 0, -5,
	-5, 0, 0, 0, 0, 0, 0, -5,
	0, 0, 0, 5, 5, 0, 0, 0
	],
	chess.QUEEN: [
	-20,-10,-10, -5, -5,-10,-10,-20,
	-10, 0, 0, 0, 0, 0, 0,-10,
	-10, 0, 5, 5, 5, 5, 0,-10,
	-5, 0, 5, 5, 5, 5, 0, -5,
	0, 0, 5, 5, 5, 5, 0, -5,
	-10, 5, 5, 5, 5, 5, 0,-10,
	-10, 0, 5, 0, 0, 0, 0,-10,
	-20,-10,-10, -5, -5,-10,-10,-20
	],
	chess.KING: [
	-30,-40,-40,-50,-50,-40,-40,-30,
	-30,-40,-40,-50,-50,-40,-40,-30,
	-30,-40,-40,-50,-50,-40,-40,-30,
	-30,-40,-40,-50,-50,-40,-40,-30,
	-20,-30,-30,-40,-40,-30,-30,-20,
	-10,-20,-20,-20,-20,-20,-20,-10,
	20, 20, 0, 0, 0, 0, 20, 20,
	1, 30, 10, 0, 0, 10, 30, 2
	]
	}

	def piece_value(color, piece_type, square):
	return PSQT[piece_type][square if color == chess.BLACK else chess.square_mirror(square)]

	def move_value(board, move):
	piece_type = board.piece_type_at(move.from_square)
	return piece_value(board.turn, piece_type, move.to_square) - piece_value(board.turn, piece_type, move.from_square)

	def sorted_legals(board):
	legals = list(board.legal_moves)

	# More frequent moves should go first
	legals.sort(reverse=True, key=lambda m: (
	m.promotion or 0,
	board.is_capture(m),
	move_value(board, m),
	m.to_square,
	m.from_square,
	))

	return legals

	class HistogramVisitor(chess.pgn.BaseVisitor):
	def __init__(self):
	super().__init__()
	self.histogram = { i: 0 for i in range(256) }

	def visit_move(self, board, move):
	legals = sorted_legals(board)

	idx = legals.index(move)
	self.histogram[idx] += 1

	def bias(histogram):
	for k, v in histogram.items():
	yield k, v + 1

	with open(sys.argv[1]) as pgn:
	visitor = HistogramVisitor()

	num_games = 0
	while chess.pgn.read_game(pgn, Visitor=lambda: visitor) and num_games < 1000:
	num_games += 1

	code = list(sorted(huffman.codebook(bias(visitor.histogram)).values(), key=len))

	bits = sum(len(code[k]) * visitor.histogram[k] for k in range(256))

	for i in range(0, 256):
	print(" new Symbol(0b{}, {}), // {}".format(code[i], len(code[i]), i))

	print(" //", bits / 8 / num_games, "bytes per game")
	print(" //", bits / 8 / sum(visitor.histogram.values()), "bytes per move")