Skip to content

Instantly share code, notes, and snippets.

@dylanmei
Created September 9, 2019 14:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dylanmei/77b9215a219047337089ebb16fd4ed31 to your computer and use it in GitHub Desktop.
Save dylanmei/77b9215a219047337089ebb16fd4ed31 to your computer and use it in GitHub Desktop.
Parse PGN chess database files
import java.io.File
import java.io.Reader
import java.io.StringReader
import java.io.BufferedReader
import kotlin.sequences.*
import kotlin.collections.*
data class Match(
val event: String,
val site: String,
val round: String,
val date: String,
val white: String,
val black: String,
val moves: List<Move>)
data class Move(
val number: Int,
val color: String,
val text: String)
class PgnMatchParser {
private val moveRegex = """\d+\.\s+""".toRegex()
fun parse(text: String): Match {
val attributes = parseAttributes(text)
return Match(
event = attributes.getValue("Event"),
site = attributes.getValue("Site"),
round = attributes.getValue("Round"),
date = attributes.getValue("Date"),
white = attributes.getValue("White"),
black = attributes.getValue("Black"),
moves = parseMoves(text)
)
}
fun parseAttributes(text: String): Map<String, String> {
val attributes = mutableMapOf<String, String>()
text.lines().forEach {
if (it.startsWith("[") && it.endsWith("]")) {
val attribute = it.trim('[', ']')
val (name, value) = attribute.split(" ", limit = 2)
attributes.put(name, value.trim('"'))
}
}
return attributes
}
fun parseMoves(text: String): List<Move> {
val line = text.lines().find {
it.startsWith("1.")
} ?: ""
return line.split(moveRegex)
.filter { it != "" }
.mapIndexed { i, move ->
Move(
number = i + 1,
color = if (i % 2 == 0) "white" else "black",
text = move.trim())
}
}
}
class PgnMatchFormatter {
val MAX_LINE_LEN = 74 // not including newline char
fun format(match: Match): String {
return "${attributes(match)}\n${moves(match)}"
}
fun attributes(match: Match) = """
[Event "${match.event}"]
[Site "${match.site}"]
[Round "${match.round}"]
[Date "${match.date}"]
[White "${match.white}"]
[Black "${match.black}"]
""".trimIndent()
fun moves(match: Match): String {
var lineLen = 0
val builder = StringBuilder()
match.moves.forEachIndexed { i, move ->
if (i > 0) {
// except for first move, prepend a space
// character to separate moves
if (lineLen + 1 > MAX_LINE_LEN) {
lineLen = 0
builder.append("\n")
}
lineLen++
builder.append(" ")
}
val number = "${i + 1}."
if (lineLen + number.length > MAX_LINE_LEN) {
lineLen = 0
builder.append("\n")
}
lineLen += number.length
builder.append(number)
if (lineLen + 1 > MAX_LINE_LEN) {
lineLen = 0
builder.append("\n")
}
lineLen++
builder.append(" ")
val text = move.text
if (lineLen + text.length > MAX_LINE_LEN) {
lineLen = 0
builder.append("\n")
}
lineLen += text.length
builder.append(text)
}
return builder.toString()
}
}
class PgnFileSplitter {
private val attrRegex = """^\[.+\]$""".toRegex()
private val moveRegex = """^\s?.+""".toRegex()
private val termRegex = """^$""".toRegex()
fun split(text: String): Sequence<String> {
return split(BufferedReader(StringReader(text)))
}
fun split(file: File): Sequence<String> {
return split(file.bufferedReader())
}
fun split(reader: BufferedReader) = sequence {
val newLines = mutableListOf<String>()
var waitingForTermination = false
var line = reader.readLine()
while (line != null) {
when {
line.matches(attrRegex) -> newLines.add(line + "\n")
line.matches(moveRegex) -> {
waitingForTermination = true
newLines[newLines.lastIndex] = newLines.last() + line
}
waitingForTermination && line.matches(termRegex) -> {
yield(newLines.joinToString(""))
newLines.clear()
waitingForTermination = false
}
}
line = reader.readLine()
}
if (newLines.count() != 0) {
yield(newLines.joinToString(""))
}
}
}
fun main(args : Array<String>) {
//val file = File("matches.pgn")
val file = """
[Event "National Open 2012"]
[Site "Las Vegas USA"]
[Date "2012.06.15"]
[Round "1"]
[White "Karagianis, P. (wh)"]
[Black "Kekelidze, M. (bl)"]
[Result "0-1"]
[WhiteElo "2[1]83"]
[BlackElo "2494"]
[ECO "E10"]
1. d4 Nf6 2. c4 e6 3. Nf3 c5 4. d5 b5 5. Bf4 b4 6. Qc2 d6 7. e4 e5 8. Bd2
a5 9. a3 Na6 10. g3 g6 11. Bg2 Bg7 12. O-O O-O 13. Nh4 Ne8 14. Qc1 f5 15.
exf5 gxf5 16. Bg5 Qb6 17. Bh6 Ra7 18. Qg5 Rf6 19. Bxg7 Rxg7 20. Qh5 Rff7
21. Bh3 Nf6 22. Qe2 Ng4 23. f3 Nh6 24. Nd2 a4 25. Ne4 Qd8 26. Qd2 Qxh4 27.
Nxd6 0-1
"""
val splitter = PgnFileSplitter()
val parser = PgnMatchParser()
val formatter = PgnMatchFormatter()
val matches = splitter
.split(file)
.map { parser.parse(it) }
matches.forEach {
println(formatter.format(it))
println()
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment