Skip to content

Instantly share code, notes, and snippets.

Created February 8, 2017 15:37
Show Gist options
  • Save kidig/4cc9a72b1117d8d6dbd35adf9a8a995a to your computer and use it in GitHub Desktop.
Save kidig/4cc9a72b1117d8d6dbd35adf9a8a995a to your computer and use it in GitHub Desktop.
PGN chess format parser
# Demonstration of the parsing module, implementing a pgn parser.#
# Originally by Alberto Santini
import sys
from pyparsing import Combine, Forward, Group, Literal, oneOf, OneOrMore, Optional, Suppress, ZeroOrMore, Word
from pyparsing import ParseException
from pyparsing import alphanums, nums, quotedString, removeQuotes
unicodePrintables = ''.join(chr(c) for c in range(sys.maxunicode) if not chr(c).isspace())
unicodePrintablesNoBraces = unicodePrintables.replace('{', '').replace('}', '')
class Node:
def __init__(self, tokens):
self.tokens = tokens
def __repr__(self):
return "{}: ({})".format(self.__class__.__name__, self.tokens.asList())
class TagNode(Node): pass
class MoveNode(Node): pass
class CommentNode(Node): pass
# define pgn grammar
tag_value = Combine(quotedString)
tag = Suppress("[") + Group(Word(alphanums) + tag_value) + Suppress("]")
tag.setParseAction(lambda toks: {'name': toks[0][0], 'value': toks[0][1]})
# comment_text = Word(unicodePrintablesNoBraces + " ")
# comment = Group(Suppress("{") + OneOrMore(comment_text) + Suppress("}"))
# comment.addParseAction(lambda toks: toks)
comment_text = Forward()
comment = Suppress("{") + Combine(comment_text, joinString=" ") + Suppress("}")
comment_text << OneOrMore(Word(unicodePrintablesNoBraces + " "))
dot = Literal(".")
piece = oneOf("K Q B N R")
file_coord = oneOf("a b c d e f g h")
rank_coord = oneOf("1 2 3 4 5 6 7 8")
capture = oneOf("x :")
promote = Literal("=")
castle_queenside = Literal("O-O-O") | Literal("0-0-0") | Literal("o-o-o")
castle_kingside = Literal("O-O") | Literal("0-0") | Literal("o-o")
move_number = Optional(comment) + Word(nums) + dot
m1 = file_coord + rank_coord # pawn move e.g. d4
m2 = file_coord + capture + file_coord + rank_coord # pawn capture move e.g. dxe5
m3 = file_coord + "8" + promote + piece # pawn promotion e.g. e8=Q
m4 = piece + file_coord + rank_coord # piece move e.g. Be6
m5 = piece + file_coord + file_coord + rank_coord # piece move e.g. Nbd2
m6 = piece + rank_coord + file_coord + rank_coord # piece move e.g. R4a7
m7 = piece + capture + file_coord + rank_coord # piece capture move e.g. Bxh7
m8 = castle_queenside | castle_kingside # castling e.g. o-o
check = oneOf("+ ++")
mate = Literal("#")
annotation = Word("!?", max=2)
nag = " $" + Word(nums)
decoration = check | mate | annotation | nag
variant = Forward()
half_move = Combine((m3 | m1 | m2 | m4 | m5 | m6 | m7 | m8) + Optional(decoration)) \
+ Optional(comment) + Optional(variant)
# move = Suppress(move_number) + half_move + Optional(half_move)
# variant << "(" + OneOrMore(move) + ")"
# grouping the plies (half-moves) for each move: useful to group annotations, variants...
# suggested by Paul McGuire :)
move = Group(Suppress(move_number) + half_move + Optional(half_move))
variant << Group(Suppress("(") + OneOrMore(move) + Suppress(")"))
game_terminator = oneOf("1-0 0-1 1/2-1/2 *")
tags = Group(ZeroOrMore(tag))
grammar = tags + Group(Optional(comment) + ZeroOrMore(move)) + Suppress(game_terminator)
# tag.addParseAction(TagNode)
tags.setParseAction(lambda toks: {'headers': toks[0].asList()})
def parse_pgn(text, bnf=grammar):
return bnf.parseString(text)
except ParseException as err:
print(" " * (err.column - 1) + "^")
if __name__ == "__main__":
# input string
pgn = """
[Event "ICC 5 0 u"]
[Site "Internet Chess Club"]
[Date "2004.01.25"]
[Round "-"]
[White "guest920"]
[Black "IceBox"]
[Result "0-1"]
[ICCResult "White checkmated"]
[BlackElo "1498"]
[Opening "French defense"]
[ECO "C00"]
[NIC "FR.01"]
[Time "04:44:56"]
[TimeControl "300+0"]
{start йцу,
Comment} 1. e4 e6 2. Nf3 d5 $2 3. exd5 (3. e5 g6 4. h4) exd5 4. Qe2+ Qe7 5. Qxe7+ Bxe7 6. d3 Nf6 7. Be3
Bg4 8. Nbd2 c5 9. h3 Be6 10. O-O-O Nc6 11. g4 Bd6 12. g5 Nd7 13. Rg1 d4 14.
g6 fxg6 15. Bg5 Rf8 16. a3 Bd5 17. Re1+ Nde5 18. Nxe5 Nxe5 19. Bf4 Rf5 20.
Bxe5 Rxe5 21. Rg5 Rxe1# {Black wins} 0-1
# parse input string
tokens = parse_pgn(pgn, grammar)
# print("\n\n".join([str(t) for t in tokens]))
# print(repr(tokens[0]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment