Skip to content

Instantly share code, notes, and snippets.

@brodieG
Last active August 29, 2015 14:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brodieG/160405262b53b0bb82bf to your computer and use it in GitHub Desktop.
Save brodieG/160405262b53b0bb82bf to your computer and use it in GitHub Desktop.
Benchmarks for Random sampling from 01010 etc.
make_pool <- function(size)
sort(
unlist(
lapply(
seq_len(size),
function(x) do.call(paste0, expand.grid(rep(list(c('0', '1')), x)))
) ) )
system.time(pool4 <- make_pool(4))
system.time(pool8 <- make_pool(8))
system.time(pool16 <- make_pool(16))
pools <- list("4"=pool4, "8"=pool8, "16"=pool16)
# = All Funs with a `size`, `n` interface ======================================
#-------------------------------------------------------------------------------
jbaum <- function(size, n) {
pool <- pools[[as.character(size)]]
chosen <- sample(pool, n)
while(any(rowSums(outer(paste0('^', chosen), chosen, Vectorize(grepl))) > 1)) {
prefixes <- rowSums(outer(paste0('^', chosen), chosen, Vectorize(grepl))) > 1
pool <- pool[rowSums(Vectorize(grepl, 'pattern')(
paste0('^', chosen[!prefixes]), pool)) == 0]
chosen <- c(chosen[!prefixes], sample(pool, sum(prefixes)))
}
chosen
}
#-------------------------------------------------------------------------------
library(data.table)
get_01str <- function(id,max_len){
cuts = cumsum(2^(1:max_len-1))
g = findInterval(id,cuts)
gid = id-cuts[g]+1
data.table(g,gid)[,s:=
do.call(paste,c(list(sep=""),lapply(
seq(g[1]),
function(x) (gid-1) %/% 2^(x-1) %% 2
)))
,by=g]$s
}
get_nixstrs <- function(g,gid,max_len){
cuts = cumsum(2^(1:max_len-1))
gids_child = {
x = gid%%2^sequence(g-1)
ifelse(x,x,2^sequence(g-1))
}
ids_child = gids_child+cuts[sequence(g-1)]-1
ids_parent = if (g==max_len) gid+cuts[g]-1 else {
gids_par = vector(mode="list",max_len)
gids_par[[g]] = gid
for (gg in seq(g,max_len-1))
gids_par[[gg+1]] = c(gids_par[[gg]],gids_par[[gg]]+2^gg)
unlist(mapply(`+`,gids_par,cuts-1))
}
c(ids_child,ids_parent)
}
drawem <- function(n,max_len){
cuts = cumsum(2^(1:max_len-1))
inds_by_g = mapply(seq,cuts,cuts*2)
oklens = (1:max_len)[ n <= 2^max_len*(1-2^(-(1:max_len)))+1 ]
okinds = unlist(inds_by_g[oklens])
mysamp = rep(0,n)
for (i in 1:n){
id = if (length(okinds)==1) okinds else sample(okinds,1)
g = findInterval(id,cuts)
gid = id-cuts[g]+1
nixed = get_nixstrs(g,gid,max_len)
# print(id); print(okinds); print(nixed)
mysamp[i] = id
okinds = setdiff(okinds,nixed)
if (!length(okinds)) break
}
res <- rep("",n)
res[seq.int(i)] <- get_01str(mysamp[seq.int(i)],max_len)
res
}
frank <- function(size, n) drawem(n, size)
#-------------------------------------------------------------------------------
library(stringr)
tree_sample <- function(samples,pool, max_len) {
results <- vector("integer",samples)
# Will be used on a regular basis, compute it in advance
PoolLen <- str_length(pool)
# Make a mask vector based on the length of each entry of the pool
masks <- strtoi(str_pad(str_pad("1",PoolLen,"right","1"),max_len,"right","0"),base=2)
# Make an integer vector from "0" right padded orignal: for max_len=4 and pool entry "1" we get "1000" => 8
# This will allow to find this entry as parent of 10 and 11 which become "1000" and "1100", as integer 8 and 12 respectively
# once bitwise "anded" with the repective mask "1000" the first bit is striclty the same, so it's a parent.
integerPool <- strtoi(str_pad(pool,max_len,"right","0"),base=2)
# Create a vector to filter the available value to sample
ok <- rep(TRUE,length(pool))
#Precompute the result of the bitwise and betwwen our integer pool and the masks
MaskedPool <- bitwAnd(integerPool,masks)
while(samples) {
samp <- sample(pool[ok],1) # Get a sample
results[samples] <- samp # Store it as result
ok[pool == samp] <- FALSE # Remove it from available entries
vsamp <- strtoi(str_pad(samp,max_len,"right","0"),base=2) # Get the integer value of the "0" right padded sample
mlen <- str_length(samp) # Get sample len
#Creation of unitary mask to remove childs of sample
mask <- strtoi(paste0(rep(1:0,c(mlen,max_len-mlen)),collapse=""),base=2)
# Get the result of bitwise And between the integerPool and the sample mask
FilterVec <- bitwAnd(integerPool,mask)
# Get the bitwise and result of the sample and it's mask
Childm <- bitwAnd(vsamp,mask)
ok[FilterVec == Childm] <- FALSE # Remove from available entries the childs of the sample
ok[MaskedPool == bitwAnd(vsamp,masks)] <- FALSE # compare the sample with all the masks to remove parents matching
samples <- samples -1
if(!sum(ok)) break
}
results
}
tensibai <- function(size, n) {
tree_sample(n,pools[[as.character(size)]], size)
}
#-------------------------------------------------------------------------------
library(lpSolve)
sample.lp <- function(pool, max_len) {
pool <- sort(pool)
pml <- max(nchar(pool))
runs <- c(rev(cumsum(2^(seq(pml-1)))), 0)
banned.from <- rep(seq(pool), runs[nchar(pool)])
banned.to <- banned.from + unlist(lapply(runs[nchar(pool)], seq_len))
banned.constr <- matrix(0, nrow=length(banned.from), ncol=length(pool))
banned.constr[cbind(seq(banned.from), banned.from)] <- 1
banned.constr[cbind(seq(banned.to), banned.to)] <- 1
mod <- lp(direction="max",
objective.in=runif(length(pool)),
const.mat=rbind(banned.constr, rep(1, length(pool))),
const.dir=c(rep("<=", length(banned.from)), "=="),
const.rhs=c(rep(1, length(banned.from)), max_len),
all.bin=TRUE)
pool[which(mod$solution == 1)]
}
josilber <- function(size, n) sample.lp(pools[[as.character(size)]], n)
# = Run benchmarks ===================================================
library(functional)
stop("Remember to load the brodie funs")
funs <- c(
jbaum=jbaum, josilber=josilber, frank=frank, tensibai=tensibai,
brodie.b=sample0110b,
brodie=Curry(sample0110, complete.only=TRUE),
brodie.C=Curry(sample01101, complete.only=TRUE),
brodie.str=function(size, n) sample01(pools[[as.character(size)]], n)
)
sizes <- c(4, 8, 16)
ns <- c(10, 50, 100, 256, 1000)
res <- expand.grid(fun=names(funs), size=sizes, n=ns)
res <- subset(
res, (
fun %in% c("frank", "tensibai", "brodie.b", "brodie", "brodie.str", "brodie.cpl", "brodie.C", "brodie.C.cpl") |
size <= 8
) & ! ((fun == "josilber" & n > 10) | (fun == "jbaum" & n > 100))
)
res$val <- NA_integer_
library(microbenchmark)
for(i in 1:nrow(res)) {
cat(sprintf("Running %s size: %d n: %d\n", res[i, 1], res[i, 2], res[i, 3]))
bm <- try(
microbenchmark(funs[[res[i, 1]]](res[i, 2], res[i, 3]), times=3)$time[[2]]
)
val <- if(inherits(bm, "try-error")) NA else bm
res[i, 4] <- val
}
dcast(transform(res, val=format(round(val/1e6), big.mark=",")), size + n ~ fun, value.var="val")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment