Skip to content

Instantly share code, notes, and snippets.

@sgibb
Created March 3, 2019 21:27
Show Gist options
  • Save sgibb/8c8ace205ae566680cd08022b49fc790 to your computer and use it in GitHub Desktop.
Save sgibb/8c8ace205ae566680cd08022b49fc790 to your computer and use it in GitHub Desktop.
simple on-disk vector
setClass("ondiskvec",
slots=list(path="character", n="numeric"),
prototype=list(path=character(), n=numeric())
)
ondiskvec <- function(x, path=tempfile()) {
writeBin(as.double(x), path, size=8L)
new("ondiskvec", path=path, n=length(x))
}
setMethod("length", "ondiskvec", function(x)x@n)
setMethod(f="[",
signature=signature(x="ondiskvec", i="numeric", j="missing"),
definition=function(x, i, j, ..., drop=FALSE) {
if (length(i) == 1L) {
f <- file(x@path, "rb")
on.exit(close(f))
if (i > 1L) {
seek(f, where=(i - 1L) * 8L)
}
readBin(f, "double", n=1L, size=8L)
} else {
# that's stupid but not used often
readBin(x@path, "double", n=x@n, size=8L)[i]
}
})
setMethod(f="[",
signature=signature(x="ondiskvec", i="missing", j="missing"),
definition=function(x, i, j, ..., drop=FALSE) {
readBin(x@path, "double", n=x@n, size=8L)
})
setReplaceMethod(f="[",
signature=signature(x="ondiskvec", i="missing", j="missing"),
definition=function(x, i, j, ..., value) {
if (length(value) != x@n) {
stop("Length of 'value' doesn't match length of 'x'.")
}
writeBin(as.double(value), x@path, size=8L)
x
})
x <- 1:10
odv <- ondiskvec(x)
odv[1]
odv[9]
odv[10]
odv[]
odv[] <- 11:20
odv[]
library("matter")
library("microbenchmark")
x <- sample(1e5)
m <- matter_vec(x)
o <- ondiskvec(x)
microbenchmark(m[], o[])
# Unit: microseconds
# expr min lq mean median uq max neval
# m[] 370.232 502.118 637.9967 625.6630 676.8755 3272.946 100
# o[] 209.726 269.250 398.4346 334.7615 374.1130 3299.462 100
microbenchmark(m[1L], o[1L])
# Unit: microseconds
# expr min lq mean median uq max neval
# m[1L] 43.511 44.9160 87.74990 47.020 48.3285 3927.426 100
# o[1L] 18.862 19.9515 31.38386 22.748 23.8100 884.608 100
microbenchmark(m[length(m)], o[length(o)])
# Unit: microseconds
# expr min lq mean median uq max neval
# m[length(m)] 44.719 47.6770 59.22049 50.5620 55.709 182.632 100
# o[length(o)] 27.994 30.4715 38.05458 34.2625 36.499 165.515 100
i <- sample(1e5, 1e2)
microbenchmark(m[i], o[i])
# Unit: microseconds
# expr min lq mean median uq max neval
# m[i] 199.069 237.216 295.7426 274.1865 330.4935 602.635 100
# o[i] 226.902 265.557 423.4638 301.9010 382.5500 4896.891 100
microbenchmark(m[]<-x, o[]<-x, x[]<-x)
# Unit: microseconds
# expr min lq mean median uq max neval
# m[] <- x 845.461 1082.198 1365.1878 1191.546 1395.302 5635.026 100
# o[] <- x 859.788 1306.372 2660.3586 2019.383 3288.673 22707.037 100
# x[] <- x 331.224 364.996 541.4231 470.652 614.356 2288.032 100
microbenchmark(sum(m), sum(o[]))
# Unit: microseconds
# expr min lq mean median uq max neval
# sum(m) 2799.899 2983.1655 3237.6274 3146.5525 3279.2650 7326.494 100
# sum(o[]) 272.509 306.9125 607.9439 451.4855 569.7225 6629.260 100
f <- function(y){ y[] <- y[] + 1L; y}
microbenchmark(f(m), f(o), f(x))
# Unit: microseconds
# expr min lq mean median uq max neval
# f(m) 1466.246 1743.6230 2113.5229 1993.194 2264.881 4326.144 100
# f(o) 1074.079 1370.5895 2301.5754 1675.156 3423.842 5997.356 100
# f(x) 551.204 622.2855 934.9378 797.703 987.192 6961.754 100
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment