Created
April 12, 2017 02:43
-
-
Save mrdwab/048f4323217bade1168a9b3dff521b22 to your computer and use it in GitHub Desktop.
Output comparison and timings for different approaches posted at Q#43350554 at Stack Overflow.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://stackoverflow.com/questions/43350554/r-filling-in-empty-variables | |
fun1 <- function() { | |
apply(t(df), 2, function(x) { | |
conds <- rowSums(cbind(x, dplyr::lag(x), dplyr::lead(x)), na.rm = T)==2 | |
x[conds] <- 1 | |
x | |
}) %>% t() | |
} | |
fun2 <- function() { | |
df[] <- t(apply(df, 1, function(x) { | |
st <- range(which(!is.na(x))) | |
x[st[1]:st[2]] <- x[st[1]] | |
x})) | |
df | |
} | |
library(data.table) | |
myFun1 <- function(indf) { | |
M <- as.matrix(data.table(which(indf == 1, arr.ind = TRUE))[ | |
, list(col = seq.int(min(col), max(col))), row]) | |
indf[M] <- 1 | |
indf | |
} | |
myFun2 <- function(indf) { | |
indf2 <- replace(indf, is.na(indf), 0) | |
mins <- max.col(indf2, "first") | |
maxs <- max.col(indf2, "last") | |
L <- Map(seq.int, mins, maxs) | |
mat <- cbind(rep(seq_along(L), lengths(L)), unlist(L, use.names = FALSE)) | |
indf[mat] <- 1 | |
indf | |
} | |
set.seed(1) | |
nc <- 10 | |
nr <- 10 | |
df <- data.frame(t(replicate(nr, sample(c(1, 1, rep(NA, nc-2)))))) | |
df | |
# X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 | |
# 1 NA NA NA NA 1 NA NA NA NA 1 | |
# 2 NA 1 NA NA NA NA NA NA 1 NA | |
# 3 NA 1 NA 1 NA NA NA NA NA NA | |
# 4 NA NA NA 1 NA NA NA 1 NA NA | |
# 5 NA NA NA NA NA NA 1 1 NA NA | |
# 6 NA NA NA 1 1 NA NA NA NA NA | |
# 7 NA NA NA NA NA 1 NA NA 1 NA | |
# 8 NA NA NA NA NA NA NA 1 NA 1 | |
# 9 NA NA NA NA NA 1 NA 1 NA NA | |
# 10 NA 1 NA NA NA NA 1 NA NA NA | |
fun1() | |
# X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 | |
# [1,] NA NA NA NA 1 NA NA NA NA 1 * | |
# [2,] NA 1 NA NA NA NA NA NA 1 NA * | |
# [3,] NA 1 1 1 NA NA NA NA NA NA | |
# [4,] NA NA NA 1 NA NA NA 1 NA NA * | |
# [5,] NA NA NA NA NA NA 1 1 NA NA | |
# [6,] NA NA NA 1 1 NA NA NA NA NA | |
# [7,] NA NA NA NA NA 1 NA NA 1 NA * | |
# [8,] NA NA NA NA NA NA NA 1 1 1 | |
# [9,] NA NA NA NA NA 1 1 1 NA NA | |
# [10,] NA 1 NA NA NA NA 1 NA NA NA * | |
fun2() | |
# X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 | |
# 1 NA NA NA NA 1 1 1 1 1 1 | |
# 2 NA 1 1 1 1 1 1 1 1 NA | |
# 3 NA 1 1 1 NA NA NA NA NA NA | |
# 4 NA NA NA 1 1 1 1 1 NA NA | |
# 5 NA NA NA NA NA NA 1 1 NA NA | |
# 6 NA NA NA 1 1 NA NA NA NA NA | |
# 7 NA NA NA NA NA 1 1 1 1 NA | |
# 8 NA NA NA NA NA NA NA 1 1 1 | |
# 9 NA NA NA NA NA 1 1 1 NA NA | |
# 10 NA 1 1 1 1 1 1 NA NA NA | |
identical(fun2(), myFun1(df)) | |
# [1] TRUE | |
identical(fun2(), myFun2(df)) | |
# [1] TRUE | |
library(microbenchmark) | |
microbenchmark(myFun1(df), myFun2(df), fun2()) # 10 x 10 data.frame | |
# Unit: microseconds | |
# expr min lq mean median uq max neval | |
# myFun1(df) 1078.478 1126.6090 1249.6862 1196.8295 1286.4845 4722.194 100 | |
# myFun2(df) 678.980 720.0460 803.3389 771.8900 818.0290 1429.200 100 | |
# fun2() 388.371 422.4315 453.3847 441.5105 462.4375 807.734 100 | |
set.seed(1) | |
nc <- 100 | |
nr <- 1000 | |
df <- data.frame(t(replicate(nr, sample(c(1, 1, rep(NA, nc-2)))))) | |
microbenchmark(myFun1(df), myFun2(df), fun2()) # 1000 x 100 data.frame | |
# Unit: milliseconds | |
# expr min lq mean median uq max neval | |
# myFun1(df) 9.74110 10.21972 11.39599 10.41556 11.04388 17.04424 100 | |
# myFun2(df) 11.40730 12.02916 15.36465 12.40388 15.29860 184.79787 100 | |
# fun2() 19.24917 21.05500 23.67406 22.17829 26.79834 39.69459 100 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment