Skip to content

Instantly share code, notes, and snippets.

@everdark
Last active March 26, 2017 07:17
Show Gist options
  • Save everdark/f307ba34696197f9afafd51efdb13d2d to your computer and use it in GitHub Desktop.
Save everdark/f307ba34696197f9afafd51efdb13d2d to your computer and use it in GitHub Desktop.
mxnet character-level lstm model: toy example
#!/usr/bin/env Rscript
# kylechung 2017-03-25
# refernece: http://mxnet.io/tutorials/r/charRnnModel.html
library(mxnet)
library(magrittr)
library(data.table)
# get the text data
download_data <- function(data_dir) {
dir.create(data_dir, showWarnings=FALSE)
if ( !file.exists(paste0(data_dir, "input.txt")) ) {
download.file(url="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/tinyshakespeare/input.txt",
destfile=paste0(data_dir,"input.txt"), method="wget")
}
}
download_data("./")
texts <- readLines("input.txt")
chars <- paste(texts, collapse="\n") %>%
strsplit("") %>%
unlist
# map all characters to int
all_chars <- unique(chars) %>% {
data.table(char=., idx=1L:length(.) - 1L)
} %>% setkey(char)
# project texts into int matrix
slen <- 32
snum <- as.integer(length(chars) / slen)
chars_trunc <- chars[1:(snum*slen)]
feats <- data.table(char=chars_trunc, ordering=1:length(chars_trunc)) %>%
merge(all_chars, by="char") %>%
setkey(ordering) %>%
.[, label:=shift(idx, type="lead")]
X <- array(feats$idx, dim=c(slen, snum))
y <- array(feats$label, dim=c(slen, snum))
# separate training and testing
drop_tail <- function(X, batch_size) {
nstep <- as.integer(dim(X)[2] / batch_size)
X[, 1:(nstep*batch_size)]
}
train_frac <- .9
batch_size <- 32
train_range <- 1:as.integer(dim(X)[2]*.9)
valid_range <- setdiff(1:dim(X)[2], train_range)
X_train <- X[,train_range] %>% drop_tail(batch_size)
X_valid <- X[,valid_range] %>% drop_tail(batch_size)
y_train <- y[,train_range] %>% drop_tail(batch_size)
y_valid <- y[,valid_range] %>% drop_tail(batch_size)
# train char-level lstm language model
start_time <- proc.time()
lstm_fit <- mx.lstm(
train.data=list(data=X_train, label=y_train),
eval.data=list(data=X_valid, label=y_valid),
num.lstm.layer=1,
seq.len=slen,
num.hidden=16,
num.embed=16, # the output dim of embedding
num.label=nrow(all_chars),
batch.size=batch_size,
input.size=nrow(all_chars),
num.round=10,
learning.rate=.1,
wd=1e-5,
clip_gradient=1,
initializer=mx.init.uniform(0.1),
ctx=mx.cpu())
end_time <- proc.time()
message("Time used: ", (end_time - start_time)["elapsed"])
# make inference
cdf <- function(weights) {
total <- sum(weights)
result <- c()
cumsum <- 0
for (w in weights) {
cumsum <- cumsum+w
result <- c(result, cumsum / total)
}
return (result)
}
search.val <- function(cdf, x) {
l <- 1
r <- length(cdf)
while (l <= r) {
m <- as.integer((l+r)/2)
if (cdf[m] < x) {
l <- m+1
} else {
r <- m-1
}
}
return (l)
}
choice <- function(weights) {
cdf.vals <- cdf(as.array(weights))
x <- runif(1)
idx <- search.val(cdf.vals, x)
return (idx)
}
inferer <- mx.lstm.inference(
num.lstm.layer=1,
input.size=nrow(all_chars),
num.hidden=16,
num.embed=16,
num.label=nrow(all_chars),
arg.params=lstm_fit$arg.params,
ctx=mx.cpu())
RANDOM <- TRUE
start_char <- "a"
cur_idx <- all_chars[start_char, idx]
len <- 75
out <- start_char
for ( i in 1:(len-1) ) {
input <- cur_idx
ret <- mx.lstm.forward(inferer, input, FALSE)
inferer <- ret$model # update the inferer
prob <- as.array(ret$prob)[,1] # the prob of every char as the next char
# update the idx to next char
cur_idx <-
if ( RANDOM ) {
choice(prob) - 1 # draw from the distribution
} else {
which.max(prob) - 1 # the most probable -> cause loop in short terms
}
out <- paste0(out, all_chars[idx == cur_idx, char])
}
cat (paste0(out, "\n"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment