Skip to content

Instantly share code, notes, and snippets.

@rasmusab
Created May 28, 2015 22:32
Show Gist options
  • Save rasmusab/fb98cced046d4c675d74 to your computer and use it in GitHub Desktop.
Save rasmusab/fb98cced046d4c675d74 to your computer and use it in GitHub Desktop.
Takes a json file describing chess games and produces a matrix with one row per turn showing how many pieces are left, one column per piece.
# Takes a json file describing chess games and produces a matrix with one row
# per turn showing how many pieces are left, one column per piece.
### Don't run this in R studio because it will take up twice the RAM
### as R will make copies instead of references.
library(jsonlite)
library(stringi)
# Path to your json file as produced by this script: https://gist.github.com/rasmusab/07f1823cb4bd0bc7352d
fname <- "milionbase_4000.json"
# Uses a system call that I've only tried on Linux, so will probably not work on Windows
nbr_of_games <- as.numeric(strsplit(system(paste("wc -l", fname), intern = TRUE), " ")[[1]][1])
games_file <- file(fname, "r")
n_moves <- sapply(1:nbr_of_games, function(game_id) {
if((game_id %% 10000) == 0) print(game_id)
games_row <- readLines(games_file, 1, encoding = "latin1")
length(fromJSON(games_row)$fen)
})
close(games_file)
games <- matrix(0L, nrow = sum(n_moves), ncol = 16)
games_file <- file(fname, "r")
current_n_moves <- 0
for(game_id in 1:nbr_of_games) {
games_row <- readLines(games_file, 1, encoding = "latin1")
game <- fromJSON(games_row)
result <- c("1-0" = 1, "0-1" = -1, "1/2-1/2" = 0)[game$Result]
if(is.null(game$BlackElo)) {
black_elo <- NA
} else {
black_elo <- as.numeric(game$BlackElo)
}
if(is.null(game$WhiteElo)) {
white_elo <- NA
} else {
white_elo <- as.numeric(game$WhiteElo)
}
fen <- stri_split_fixed(game$fen, " ", simplify=TRUE)
active_player <- (fen[,2] == "b") + 1
fullmoves <- as.numeric(fen[,6])
piece_count <- t(
vapply(fen[,1], stri_count_fixed, FUN.VALUE = rep(0, 10),USE.NAMES = FALSE,
pattern = c("P", "R", "N", "B", "Q", "p", "r", "n", "b", "q"))
)
game_mat <- cbind(game_id, white_elo, black_elo, result, active_player, fullmoves, piece_count)
row.names(game_mat) <- NULL
storage.mode(game_mat) <- "integer"
games[(current_n_moves + 1):(current_n_moves + nrow(game_mat)),] <- game_mat
current_n_moves <- current_n_moves + nrow(game_mat)
if((game_id %% 1000) == 0) {
print(game_id)
}
}
close(games_file)
colnames(games) <- c("game_id", "white_elo", "black_elo", "result","active_player", "fullmoves","white_pawn", "white_rook", "white_knight", "white_bishop", "white_queen",
"black_pawn", "black_rook", "black_knight", "black_bishop", "black_queen")
saveRDS(games, "milionbase_matrix.rds")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment