Skip to content

Instantly share code, notes, and snippets.

@fawda123
Last active August 29, 2015 14:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fawda123/4fc51c2cb86341ed9291 to your computer and use it in GitHub Desktop.
Save fawda123/4fc51c2cb86341ed9291 to your computer and use it in GitHub Desktop.
estimate metabolism at all SWMP sites
# packages to use
library(SWMPr)
library(httr)
library(XML)
library(foreach)
library(doParallel)
# names of files on server
files_s3 <- httr::GET('https://s3.amazonaws.com/swmpalldata/')$content
files_s3 <- rawToChar(files_s3)
files_s3 <- htmlTreeParse(files_s3, useInternalNodes = T)
files_s3 <- xpathSApply(files_s3, '//contents//key', xmlValue)
files_s3 <- gsub('\\.RData$', '', files_s3)
# find only active water quality and weather sites
# IP address must be registed with CDMO
meta <- site_codes()
sel <- meta$status %in% 'Active' & !grepl('nut$', meta$station_code)
meta <- meta[sel, ]
# get wq sites from meta, then filter by those on server
wq_sites <- grep('wq$', meta$station_code, value = T)
wq_sites <- files_s3[gsub('\\.RData$', '', files_s3) %in% wq_sites]
# setup parallel backend for processing
cl <- makeCluster(8)
registerDoParallel(cl)
strt <- Sys.time()
# process
metabs <- foreach(wq_site = wq_sites) %dopar% {
library(SWMPr)
# progress
sink('log.txt')
cat(wq_site, which(wq_site == wq_sites), 'of', length(wq_sites), '\n')
print(Sys.time() - strt)
sink()
# find corresponding wx station
met_site <- substr(wq_site, 1, 3)
met_site <- grep(paste0('^', met_site, '.*met'), files_s3, value = T)
met_site <- gsub('\\.RData$', '', met_site)
# continue if wx data found
if(length(met_site) > 0){
# if > 1 wx site, pick the one with more obs
if(length(met_site) > 1){
met_list <- vector('list', length(met_site))
names(met_list) <- met_site
for(met in met_site){
met_tmp <- import_remote(met)
met_list[[met]] <- met_tmp
}
# pick the one with most obs
met_most <- which.max(unlist(lapply(met_list, nrow)))
met <- met_list[met_most][[1]]
# otherwise load only one
} else {
# met
met <- import_remote(met_site)
}
##
# load the wq file
# wq
wq <- import_remote(wq_site)
##
# combine, estimate metabolism, reduce data volume
dat <- comb(wq, met, method = attr(met, 'station'))
dat <- ecometab(dat)
dat <- dat[1, ]
dat
}
}
names(metabs) <- wq_sites
save(metabs, file = 'metabs.RData')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment