Last active
March 26, 2017 07:17
-
-
Save everdark/f307ba34696197f9afafd51efdb13d2d to your computer and use it in GitHub Desktop.
mxnet character-level lstm model: toy example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env Rscript | |
# kylechung 2017-03-25 | |
# refernece: http://mxnet.io/tutorials/r/charRnnModel.html | |
library(mxnet) | |
library(magrittr) | |
library(data.table) | |
# get the text data | |
download_data <- function(data_dir) { | |
dir.create(data_dir, showWarnings=FALSE) | |
if ( !file.exists(paste0(data_dir, "input.txt")) ) { | |
download.file(url="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/tinyshakespeare/input.txt", | |
destfile=paste0(data_dir,"input.txt"), method="wget") | |
} | |
} | |
download_data("./") | |
texts <- readLines("input.txt") | |
chars <- paste(texts, collapse="\n") %>% | |
strsplit("") %>% | |
unlist | |
# map all characters to int | |
all_chars <- unique(chars) %>% { | |
data.table(char=., idx=1L:length(.) - 1L) | |
} %>% setkey(char) | |
# project texts into int matrix | |
slen <- 32 | |
snum <- as.integer(length(chars) / slen) | |
chars_trunc <- chars[1:(snum*slen)] | |
feats <- data.table(char=chars_trunc, ordering=1:length(chars_trunc)) %>% | |
merge(all_chars, by="char") %>% | |
setkey(ordering) %>% | |
.[, label:=shift(idx, type="lead")] | |
X <- array(feats$idx, dim=c(slen, snum)) | |
y <- array(feats$label, dim=c(slen, snum)) | |
# separate training and testing | |
drop_tail <- function(X, batch_size) { | |
nstep <- as.integer(dim(X)[2] / batch_size) | |
X[, 1:(nstep*batch_size)] | |
} | |
train_frac <- .9 | |
batch_size <- 32 | |
train_range <- 1:as.integer(dim(X)[2]*.9) | |
valid_range <- setdiff(1:dim(X)[2], train_range) | |
X_train <- X[,train_range] %>% drop_tail(batch_size) | |
X_valid <- X[,valid_range] %>% drop_tail(batch_size) | |
y_train <- y[,train_range] %>% drop_tail(batch_size) | |
y_valid <- y[,valid_range] %>% drop_tail(batch_size) | |
# train char-level lstm language model | |
start_time <- proc.time() | |
lstm_fit <- mx.lstm( | |
train.data=list(data=X_train, label=y_train), | |
eval.data=list(data=X_valid, label=y_valid), | |
num.lstm.layer=1, | |
seq.len=slen, | |
num.hidden=16, | |
num.embed=16, # the output dim of embedding | |
num.label=nrow(all_chars), | |
batch.size=batch_size, | |
input.size=nrow(all_chars), | |
num.round=10, | |
learning.rate=.1, | |
wd=1e-5, | |
clip_gradient=1, | |
initializer=mx.init.uniform(0.1), | |
ctx=mx.cpu()) | |
end_time <- proc.time() | |
message("Time used: ", (end_time - start_time)["elapsed"]) | |
# make inference | |
cdf <- function(weights) { | |
total <- sum(weights) | |
result <- c() | |
cumsum <- 0 | |
for (w in weights) { | |
cumsum <- cumsum+w | |
result <- c(result, cumsum / total) | |
} | |
return (result) | |
} | |
search.val <- function(cdf, x) { | |
l <- 1 | |
r <- length(cdf) | |
while (l <= r) { | |
m <- as.integer((l+r)/2) | |
if (cdf[m] < x) { | |
l <- m+1 | |
} else { | |
r <- m-1 | |
} | |
} | |
return (l) | |
} | |
choice <- function(weights) { | |
cdf.vals <- cdf(as.array(weights)) | |
x <- runif(1) | |
idx <- search.val(cdf.vals, x) | |
return (idx) | |
} | |
inferer <- mx.lstm.inference( | |
num.lstm.layer=1, | |
input.size=nrow(all_chars), | |
num.hidden=16, | |
num.embed=16, | |
num.label=nrow(all_chars), | |
arg.params=lstm_fit$arg.params, | |
ctx=mx.cpu()) | |
RANDOM <- TRUE | |
start_char <- "a" | |
cur_idx <- all_chars[start_char, idx] | |
len <- 75 | |
out <- start_char | |
for ( i in 1:(len-1) ) { | |
input <- cur_idx | |
ret <- mx.lstm.forward(inferer, input, FALSE) | |
inferer <- ret$model # update the inferer | |
prob <- as.array(ret$prob)[,1] # the prob of every char as the next char | |
# update the idx to next char | |
cur_idx <- | |
if ( RANDOM ) { | |
choice(prob) - 1 # draw from the distribution | |
} else { | |
which.max(prob) - 1 # the most probable -> cause loop in short terms | |
} | |
out <- paste0(out, all_chars[idx == cur_idx, char]) | |
} | |
cat (paste0(out, "\n")) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment