Created
January 19, 2020 02:42
-
-
Save brownag/99ad3bb30e960ff281b7d382c84b82b4 to your computer and use it in GitHub Desktop.
chunkApply -- new version of profileApply that scales linearly with tens of thousands of profiles
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
chunkApply <- function(object, FUN, | |
simplify = TRUE, | |
frameify = FALSE, | |
chunk.size = 100, | |
column.names = NULL, | |
...) { | |
if(simplify & frameify) { | |
message("simplify and frameify are both TRUE -- ignoring simplify argument", .call=FALSE) | |
simplify <- FALSE | |
} | |
# break SPC of size n into chunk.size chunks | |
n <- length(object) | |
chunk <- sort(1:n %% max(1, round(n / chunk.size))) + 1 | |
# operate on chunks | |
res <- do.call('c', lapply(split(1:n, chunk), function(idx) { | |
l <- lapply(as.list(idx), function(i) do.call(FUN, list(object[i, ], ...))) | |
if(simplify) | |
return(unlist(l)) | |
return(l) | |
})) | |
# return profile IDs if it makes sense for result | |
if(length(res) == length(object)) { | |
id.name <- idname(object) | |
names(res) <- profile_id(object) | |
} | |
# return horizon IDs if it makes sense for result | |
if(length(res) == nrow(object)) { | |
id.name <- hzidname(object) | |
names(res) <- hzID(object) | |
} | |
if(frameify) { | |
if(is.data.frame(res[[1]])) { | |
res <- do.call('rbind', res) | |
if(nrow(res) == nrow(object)) { | |
res <- as.data.frame(cbind(denormalize(object, idname(object)), | |
hzID(object), | |
res), | |
row.names = NULL) | |
oldnames <- colnames(res) | |
colnames(res) <- c(idname(object), hzidname(object), oldnames[3:length(oldnames)]) | |
} else if(nrow(res) == length(object)) { | |
res <- as.data.frame(cbind(denormalize(object, | |
idname(object)), | |
res), | |
row.names = NULL) | |
oldnames <- colnames(res) | |
colnames(res) <- c(idname(object), oldnames[2:length(oldnames)]) | |
} else { | |
res <- as.data.frame(res) | |
} | |
if(!is.null(column.names)) | |
colnames(res) <- column.names | |
} else { | |
message("first result is not class `data.frame` and frameify is TRUE. defaulting to list output.", .call=FALSE) | |
} | |
} | |
return(res) | |
} | |
library(aqp) | |
foo <- do.call('rbind', lapply(as.list(1:10000), random_profile)) | |
depths(foo) <- id ~ top + bottom | |
simpleFunction <- function(p) { | |
hz <- horizons(p) | |
(hz[,3] - hz[,2])[1] | |
#res <- data.frame(profile_id(p), hzID(p), thk=(hz[,3] - hz[,2])) | |
#colnames(res) <- c(idname(p), hzidname(p), 'hz_thickness') | |
#return(res) | |
} | |
c1 <- system.time(goo <- chunkApply(foo[1:100,], simpleFunction, simplify = T)) | |
cp1 <- system.time(chunkApply(foo[1:100,], simpleFunction, parallel=T)) | |
p1 <- system.time(profileApply(foo[1:100,], simpleFunction, simplify = FALSE)) | |
c2 <- system.time(chunkApply(foo[1:1000,], simpleFunction)) | |
cp2 <- system.time(chunkApply(foo[1:1000,], simpleFunction, parallel=T)) | |
p2 <- system.time(profileApply(foo[1:1000,], simpleFunction, simplify = FALSE)) | |
c3 <- system.time(chunkApply(foo[1:10000,], simpleFunction)) | |
cp3 <- system.time(chunkApply(foo[1:10000,], simpleFunction, parallel=T)) | |
c4 <- system.time(chunkApply(foo, simpleFunction)) | |
cp4 <- system.time(chunkApply(foo, simpleFunction, parallel=T)) | |
# too dang long | |
# p3 <- system.time(profileApply(foo, simpleFunction, simplify = FALSE)) | |
c1 | |
cp1 | |
p1 | |
c2 | |
cp2 | |
p2 | |
c3 | |
cp3 | |
p3 | |
library(aqp) | |
foo <- do.call('rbind', lapply(as.list(1:100000), random_profile)) | |
depths(foo) <- id ~ top + bottom | |
simpleFunction <- function(p) data.frame(horizons(p)[2,2:3]) | |
idx <- c(seq(100,900,100), seq(1000,10000,1000), seq(10000, 100000, 10000)) | |
idx.sub <- idx[idx <= 10000] | |
res <- do.call('rbind', lapply(as.list(idx.sub), function(i) { | |
system.time(profileApply(foo[1:i,], simpleFunction, simplify=F)) | |
})) | |
res2 <- do.call('rbind', lapply(as.list(idx.sub), function(i) { | |
system.time(chunkApply(foo[1:i,], simpleFunction)) | |
})) | |
plot(res[,3]~idx.sub, type="l", lwd=2, main="Time to *Apply n Profiles", | |
xlab="Number of Profiles",ylab="Time, seconds") | |
lines(res2[,3]~idx.sub, col="GREEN", lwd=2) | |
legend('topleft', legend = c("profileApply","chunkApply"), lty=1, lwd=2, col=c("BLACK","GREEN")) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment