estimate metabolism at all SWMP sites
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# packages to use | |
library(SWMPr) | |
library(httr) | |
library(XML) | |
library(foreach) | |
library(doParallel) | |
# names of files on server | |
files_s3 <- httr::GET('https://s3.amazonaws.com/swmpalldata/')$content | |
files_s3 <- rawToChar(files_s3) | |
files_s3 <- htmlTreeParse(files_s3, useInternalNodes = T) | |
files_s3 <- xpathSApply(files_s3, '//contents//key', xmlValue) | |
files_s3 <- gsub('\\.RData$', '', files_s3) | |
# find only active water quality and weather sites | |
# IP address must be registed with CDMO | |
meta <- site_codes() | |
sel <- meta$status %in% 'Active' & !grepl('nut$', meta$station_code) | |
meta <- meta[sel, ] | |
# get wq sites from meta, then filter by those on server | |
wq_sites <- grep('wq$', meta$station_code, value = T) | |
wq_sites <- files_s3[gsub('\\.RData$', '', files_s3) %in% wq_sites] | |
# setup parallel backend for processing | |
cl <- makeCluster(8) | |
registerDoParallel(cl) | |
strt <- Sys.time() | |
# process | |
metabs <- foreach(wq_site = wq_sites) %dopar% { | |
library(SWMPr) | |
# progress | |
sink('log.txt') | |
cat(wq_site, which(wq_site == wq_sites), 'of', length(wq_sites), '\n') | |
print(Sys.time() - strt) | |
sink() | |
# find corresponding wx station | |
met_site <- substr(wq_site, 1, 3) | |
met_site <- grep(paste0('^', met_site, '.*met'), files_s3, value = T) | |
met_site <- gsub('\\.RData$', '', met_site) | |
# continue if wx data found | |
if(length(met_site) > 0){ | |
# if > 1 wx site, pick the one with more obs | |
if(length(met_site) > 1){ | |
met_list <- vector('list', length(met_site)) | |
names(met_list) <- met_site | |
for(met in met_site){ | |
met_tmp <- import_remote(met) | |
met_list[[met]] <- met_tmp | |
} | |
# pick the one with most obs | |
met_most <- which.max(unlist(lapply(met_list, nrow))) | |
met <- met_list[met_most][[1]] | |
# otherwise load only one | |
} else { | |
# met | |
met <- import_remote(met_site) | |
} | |
## | |
# load the wq file | |
# wq | |
wq <- import_remote(wq_site) | |
## | |
# combine, estimate metabolism, reduce data volume | |
dat <- comb(wq, met, method = attr(met, 'station')) | |
dat <- ecometab(dat) | |
dat <- dat[1, ] | |
dat | |
} | |
} | |
names(metabs) <- wq_sites | |
save(metabs, file = 'metabs.RData') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment