Skip to content

Instantly share code, notes, and snippets.

@brownag
Created January 19, 2020 02:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brownag/99ad3bb30e960ff281b7d382c84b82b4 to your computer and use it in GitHub Desktop.
Save brownag/99ad3bb30e960ff281b7d382c84b82b4 to your computer and use it in GitHub Desktop.
chunkApply -- new version of profileApply that scales linearly with tens of thousands of profiles
chunkApply <- function(object, FUN,
simplify = TRUE,
frameify = FALSE,
chunk.size = 100,
column.names = NULL,
...) {
if(simplify & frameify) {
message("simplify and frameify are both TRUE -- ignoring simplify argument", .call=FALSE)
simplify <- FALSE
}
# break SPC of size n into chunk.size chunks
n <- length(object)
chunk <- sort(1:n %% max(1, round(n / chunk.size))) + 1
# operate on chunks
res <- do.call('c', lapply(split(1:n, chunk), function(idx) {
l <- lapply(as.list(idx), function(i) do.call(FUN, list(object[i, ], ...)))
if(simplify)
return(unlist(l))
return(l)
}))
# return profile IDs if it makes sense for result
if(length(res) == length(object)) {
id.name <- idname(object)
names(res) <- profile_id(object)
}
# return horizon IDs if it makes sense for result
if(length(res) == nrow(object)) {
id.name <- hzidname(object)
names(res) <- hzID(object)
}
if(frameify) {
if(is.data.frame(res[[1]])) {
res <- do.call('rbind', res)
if(nrow(res) == nrow(object)) {
res <- as.data.frame(cbind(denormalize(object, idname(object)),
hzID(object),
res),
row.names = NULL)
oldnames <- colnames(res)
colnames(res) <- c(idname(object), hzidname(object), oldnames[3:length(oldnames)])
} else if(nrow(res) == length(object)) {
res <- as.data.frame(cbind(denormalize(object,
idname(object)),
res),
row.names = NULL)
oldnames <- colnames(res)
colnames(res) <- c(idname(object), oldnames[2:length(oldnames)])
} else {
res <- as.data.frame(res)
}
if(!is.null(column.names))
colnames(res) <- column.names
} else {
message("first result is not class `data.frame` and frameify is TRUE. defaulting to list output.", .call=FALSE)
}
}
return(res)
}
library(aqp)
foo <- do.call('rbind', lapply(as.list(1:10000), random_profile))
depths(foo) <- id ~ top + bottom
simpleFunction <- function(p) {
hz <- horizons(p)
(hz[,3] - hz[,2])[1]
#res <- data.frame(profile_id(p), hzID(p), thk=(hz[,3] - hz[,2]))
#colnames(res) <- c(idname(p), hzidname(p), 'hz_thickness')
#return(res)
}
c1 <- system.time(goo <- chunkApply(foo[1:100,], simpleFunction, simplify = T))
cp1 <- system.time(chunkApply(foo[1:100,], simpleFunction, parallel=T))
p1 <- system.time(profileApply(foo[1:100,], simpleFunction, simplify = FALSE))
c2 <- system.time(chunkApply(foo[1:1000,], simpleFunction))
cp2 <- system.time(chunkApply(foo[1:1000,], simpleFunction, parallel=T))
p2 <- system.time(profileApply(foo[1:1000,], simpleFunction, simplify = FALSE))
c3 <- system.time(chunkApply(foo[1:10000,], simpleFunction))
cp3 <- system.time(chunkApply(foo[1:10000,], simpleFunction, parallel=T))
c4 <- system.time(chunkApply(foo, simpleFunction))
cp4 <- system.time(chunkApply(foo, simpleFunction, parallel=T))
# too dang long
# p3 <- system.time(profileApply(foo, simpleFunction, simplify = FALSE))
c1
cp1
p1
c2
cp2
p2
c3
cp3
p3
library(aqp)
foo <- do.call('rbind', lapply(as.list(1:100000), random_profile))
depths(foo) <- id ~ top + bottom
simpleFunction <- function(p) data.frame(horizons(p)[2,2:3])
idx <- c(seq(100,900,100), seq(1000,10000,1000), seq(10000, 100000, 10000))
idx.sub <- idx[idx <= 10000]
res <- do.call('rbind', lapply(as.list(idx.sub), function(i) {
system.time(profileApply(foo[1:i,], simpleFunction, simplify=F))
}))
res2 <- do.call('rbind', lapply(as.list(idx.sub), function(i) {
system.time(chunkApply(foo[1:i,], simpleFunction))
}))
plot(res[,3]~idx.sub, type="l", lwd=2, main="Time to *Apply n Profiles",
xlab="Number of Profiles",ylab="Time, seconds")
lines(res2[,3]~idx.sub, col="GREEN", lwd=2)
legend('topleft', legend = c("profileApply","chunkApply"), lty=1, lwd=2, col=c("BLACK","GREEN"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment