Skip to content

Instantly share code, notes, and snippets.

@djhocking
Created January 15, 2015 21:29
Show Gist options
  • Save djhocking/a9cf393edce6274ec899 to your computer and use it in GitHub Desktop.
Save djhocking/a9cf393edce6274ec899 to your computer and use it in GitHub Desktop.
R code to derive metrics for all catchments
# Get list of unique catchments with daymet data in our database
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname=...)
# con <- dbConnect(drv, dbname=...)
qry <- "SELECT DISTINCT featureid FROM daymet;"
result <- dbSendQuery(con, qry)
catchments <- fetch(result, n=-1)
catchments <- as.character(catchments$featureid)
# get daymet data for a subset of catchments
# Make the code below into a function then loop through all catchments to create derived metrics in chucks of 100-1000
catchmentod = c("730076", "730077")
# retreiveDaymet <- function(catchmentid) {
# connect to database source
db <- src_postgres(dbname='conte_dev',...)
# db <- src_postgres(dbname='conte_dev',...)
n.catches <- length(catchmentid)
if(n.catches == 1) {
tbl_daymet <- tbl(db, 'daymet') %>%
dplyr::filter(featureid == catchmentid) %>%
dplyr::mutate(airTemp = (tmax + tmin)/2) %>%
} else {
tbl_daymet <- tbl(db, 'daymet') %>%
dplyr::filter(featureid %in% catchmentid) %>%
dplyr::mutate(airTemp = (tmax + tmin)/2) %>%
}
# df$site <- paste(df$agency_name, df$location_name, sep='_')
springFallBPs$site <- as.character(springFallBPs$site)
# temp hack
observedData$site <- as.character(observedData$site)
tempData <- left_join(climateData, select(covariateData, -Latitude, -Longitude), by=c('site'))
tempData <- left_join(tempData, dplyr::select(.data = observedData, agency, date, AgencyID, site, temp), by = c("site", "date"))
tempDataBP <- left_join(tempData, springFallBPs, by=c('site', 'year'))
# Clip to syncronized season
# temp hack - eventually need to adjust Kyle's code to substitute huc or other mean breakpoint in when NA
fullDataSync <- tempDataBP %>%
filter(dOY >= finalSpringBP & dOY <= finalFallBP | is.na(finalSpringBP) | is.na(finalFallBP)) %>%
filter(dOY >= mean(finalSpringBP, na.rm = T) & dOY <= mean(finalFallBP, na.rm = T))
rm(covariateData) # save some memory
# Order by group and date
fullDataSync <- fullDataSync[order(fullDataSync$site,fullDataSync$year,fullDataSync$dOY),]
# For checking the order of fullDataSync
fullDataSync$count <- 1:length(fullDataSync$year)
fullDataSync <- fullDataSync[order(fullDataSync$count),] # just to make sure fullDataSync is ordered for the slide function
# airTemp
fullDataSync <- slide(fullDataSync, Var = "airTemp", GroupVar = "site", slideBy = -1, NewVar='airTempLagged1')
fullDataSync <- slide(fullDataSync, Var = "airTemp", GroupVar = "site", slideBy = -2, NewVar='airTempLagged2')
# prcp
fullDataSync <- slide(fullDataSync, Var = "prcp", GroupVar = "site", slideBy = -1, NewVar='prcpLagged1')
fullDataSync <- slide(fullDataSync, Var = "prcp", GroupVar = "site", slideBy = -2, NewVar='prcpLagged2')
fullDataSync <- slide(fullDataSync, Var = "prcp", GroupVar = "site", slideBy = -3, NewVar='prcpLagged3')
fullDataSync <- fullDataSync[ , c("agency", "date", "AgencyID", "year", "site", "date", "finalSpringBP", "finalFallBP", "FEATUREID", "HUC4", "HUC8", "HUC12", "temp", "Latitude", "Longitude", "airTemp", "airTempLagged1", "airTempLagged2", "prcp", "prcpLagged1", "prcpLagged2", "prcpLagged3", "dOY", "Forest", "Herbacious", "Agriculture", "Developed", "TotDASqKM", "ReachElevationM", "ImpoundmentsAllSqKM", "HydrologicGroupAB", "SurficialCoarseC", "CONUSWetland", "ReachSlopePCNT", "srad", "dayl", "swe")] #
# Standardize and Add Covariates and Indices for Analysis
fullDataSyncS <- stdCovs(x = fullDataSync, y = tempDataSync, var.names = var.names)
fullDataSyncS <- addInteractions(fullDataSyncS)
fullDataSyncS <- indexDeployments(fullDataSyncS, regional = TRUE)
#firstObsRowsFull <- createFirstRows(fullDataSyncS)
#evalRowsFull <- createEvalRows(fullDataSyncS)
# Make predictions
fullDataSyncS <- mutate(fullDataSyncS, huc = HUC8)
fullDataSyncS <- predictTemp(data = fullDataSyncS, coef.list = coef.list, cov.list = cov.list)
# Add predictions back to original (unscaled) dataframe
fullDataSync <- left_join(fullDataSync, select(fullDataSyncS, site, date, tempPredicted), by = c("site", "date"))
###Derived metrics
derived.site.metrics <- deriveMetrics(fullDataSync)
write.table(derived.site.metrics, file = 'reports/MADEP/derived_site_metrics.csv', sep = ',', row.names = F)
# }
@djhocking
Copy link
Author

Thanks. This looks like it should work for the question I asked. Unfortunately, I realized that I need this for every catchment in the daymet record rather than every catchment (site) in the observed locations table. I can just reverse the order of the left_join but then handle the NA for site in the predict function (which it should already do). I just changed the order of the left_join and added a filter so I can do it in chunks.

  # left join qry_locations to daymet to get daymet timeseries for each unique site
  qry_daymet <- tbl(db, 'daymet') %>%
    left_join(select(qry_locations, site, featureid), by='featureid') %>%
    filter(featureid %in% catchmentid) %>%
    mutate(airTemp = (tmax + tmin)/2)

df_daymet <- collect(qry_daymet)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment