Skip to content

Instantly share code, notes, and snippets.

@mrdwab
Created April 12, 2017 02:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mrdwab/048f4323217bade1168a9b3dff521b22 to your computer and use it in GitHub Desktop.
Save mrdwab/048f4323217bade1168a9b3dff521b22 to your computer and use it in GitHub Desktop.
Output comparison and timings for different approaches posted at Q#43350554 at Stack Overflow.
# https://stackoverflow.com/questions/43350554/r-filling-in-empty-variables
fun1 <- function() {
apply(t(df), 2, function(x) {
conds <- rowSums(cbind(x, dplyr::lag(x), dplyr::lead(x)), na.rm = T)==2
x[conds] <- 1
x
}) %>% t()
}
fun2 <- function() {
df[] <- t(apply(df, 1, function(x) {
st <- range(which(!is.na(x)))
x[st[1]:st[2]] <- x[st[1]]
x}))
df
}
library(data.table)
myFun1 <- function(indf) {
M <- as.matrix(data.table(which(indf == 1, arr.ind = TRUE))[
, list(col = seq.int(min(col), max(col))), row])
indf[M] <- 1
indf
}
myFun2 <- function(indf) {
indf2 <- replace(indf, is.na(indf), 0)
mins <- max.col(indf2, "first")
maxs <- max.col(indf2, "last")
L <- Map(seq.int, mins, maxs)
mat <- cbind(rep(seq_along(L), lengths(L)), unlist(L, use.names = FALSE))
indf[mat] <- 1
indf
}
set.seed(1)
nc <- 10
nr <- 10
df <- data.frame(t(replicate(nr, sample(c(1, 1, rep(NA, nc-2))))))
df
# X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
# 1 NA NA NA NA 1 NA NA NA NA 1
# 2 NA 1 NA NA NA NA NA NA 1 NA
# 3 NA 1 NA 1 NA NA NA NA NA NA
# 4 NA NA NA 1 NA NA NA 1 NA NA
# 5 NA NA NA NA NA NA 1 1 NA NA
# 6 NA NA NA 1 1 NA NA NA NA NA
# 7 NA NA NA NA NA 1 NA NA 1 NA
# 8 NA NA NA NA NA NA NA 1 NA 1
# 9 NA NA NA NA NA 1 NA 1 NA NA
# 10 NA 1 NA NA NA NA 1 NA NA NA
fun1()
# X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
# [1,] NA NA NA NA 1 NA NA NA NA 1 *
# [2,] NA 1 NA NA NA NA NA NA 1 NA *
# [3,] NA 1 1 1 NA NA NA NA NA NA
# [4,] NA NA NA 1 NA NA NA 1 NA NA *
# [5,] NA NA NA NA NA NA 1 1 NA NA
# [6,] NA NA NA 1 1 NA NA NA NA NA
# [7,] NA NA NA NA NA 1 NA NA 1 NA *
# [8,] NA NA NA NA NA NA NA 1 1 1
# [9,] NA NA NA NA NA 1 1 1 NA NA
# [10,] NA 1 NA NA NA NA 1 NA NA NA *
fun2()
# X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
# 1 NA NA NA NA 1 1 1 1 1 1
# 2 NA 1 1 1 1 1 1 1 1 NA
# 3 NA 1 1 1 NA NA NA NA NA NA
# 4 NA NA NA 1 1 1 1 1 NA NA
# 5 NA NA NA NA NA NA 1 1 NA NA
# 6 NA NA NA 1 1 NA NA NA NA NA
# 7 NA NA NA NA NA 1 1 1 1 NA
# 8 NA NA NA NA NA NA NA 1 1 1
# 9 NA NA NA NA NA 1 1 1 NA NA
# 10 NA 1 1 1 1 1 1 NA NA NA
identical(fun2(), myFun1(df))
# [1] TRUE
identical(fun2(), myFun2(df))
# [1] TRUE
library(microbenchmark)
microbenchmark(myFun1(df), myFun2(df), fun2()) # 10 x 10 data.frame
# Unit: microseconds
# expr min lq mean median uq max neval
# myFun1(df) 1078.478 1126.6090 1249.6862 1196.8295 1286.4845 4722.194 100
# myFun2(df) 678.980 720.0460 803.3389 771.8900 818.0290 1429.200 100
# fun2() 388.371 422.4315 453.3847 441.5105 462.4375 807.734 100
set.seed(1)
nc <- 100
nr <- 1000
df <- data.frame(t(replicate(nr, sample(c(1, 1, rep(NA, nc-2))))))
microbenchmark(myFun1(df), myFun2(df), fun2()) # 1000 x 100 data.frame
# Unit: milliseconds
# expr min lq mean median uq max neval
# myFun1(df) 9.74110 10.21972 11.39599 10.41556 11.04388 17.04424 100
# myFun2(df) 11.40730 12.02916 15.36465 12.40388 15.29860 184.79787 100
# fun2() 19.24917 21.05500 23.67406 22.17829 26.79834 39.69459 100
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment