Skip to content

Instantly share code, notes, and snippets.

@djhocking
Created January 15, 2015 21:29
Show Gist options
  • Save djhocking/a9cf393edce6274ec899 to your computer and use it in GitHub Desktop.
Save djhocking/a9cf393edce6274ec899 to your computer and use it in GitHub Desktop.
R code to derive metrics for all catchments
# Get list of unique catchments with daymet data in our database
drv <- dbDriver("PostgreSQL")
con <- dbConnect(drv, dbname=...)
# con <- dbConnect(drv, dbname=...)
qry <- "SELECT DISTINCT featureid FROM daymet;"
result <- dbSendQuery(con, qry)
catchments <- fetch(result, n=-1)
catchments <- as.character(catchments$featureid)
# get daymet data for a subset of catchments
# Make the code below into a function then loop through all catchments to create derived metrics in chucks of 100-1000
catchmentod = c("730076", "730077")
# retreiveDaymet <- function(catchmentid) {
# connect to database source
db <- src_postgres(dbname='conte_dev',...)
# db <- src_postgres(dbname='conte_dev',...)
n.catches <- length(catchmentid)
if(n.catches == 1) {
tbl_daymet <- tbl(db, 'daymet') %>%
dplyr::filter(featureid == catchmentid) %>%
dplyr::mutate(airTemp = (tmax + tmin)/2) %>%
} else {
tbl_daymet <- tbl(db, 'daymet') %>%
dplyr::filter(featureid %in% catchmentid) %>%
dplyr::mutate(airTemp = (tmax + tmin)/2) %>%
}
# df$site <- paste(df$agency_name, df$location_name, sep='_')
springFallBPs$site <- as.character(springFallBPs$site)
# temp hack
observedData$site <- as.character(observedData$site)
tempData <- left_join(climateData, select(covariateData, -Latitude, -Longitude), by=c('site'))
tempData <- left_join(tempData, dplyr::select(.data = observedData, agency, date, AgencyID, site, temp), by = c("site", "date"))
tempDataBP <- left_join(tempData, springFallBPs, by=c('site', 'year'))
# Clip to syncronized season
# temp hack - eventually need to adjust Kyle's code to substitute huc or other mean breakpoint in when NA
fullDataSync <- tempDataBP %>%
filter(dOY >= finalSpringBP & dOY <= finalFallBP | is.na(finalSpringBP) | is.na(finalFallBP)) %>%
filter(dOY >= mean(finalSpringBP, na.rm = T) & dOY <= mean(finalFallBP, na.rm = T))
rm(covariateData) # save some memory
# Order by group and date
fullDataSync <- fullDataSync[order(fullDataSync$site,fullDataSync$year,fullDataSync$dOY),]
# For checking the order of fullDataSync
fullDataSync$count <- 1:length(fullDataSync$year)
fullDataSync <- fullDataSync[order(fullDataSync$count),] # just to make sure fullDataSync is ordered for the slide function
# airTemp
fullDataSync <- slide(fullDataSync, Var = "airTemp", GroupVar = "site", slideBy = -1, NewVar='airTempLagged1')
fullDataSync <- slide(fullDataSync, Var = "airTemp", GroupVar = "site", slideBy = -2, NewVar='airTempLagged2')
# prcp
fullDataSync <- slide(fullDataSync, Var = "prcp", GroupVar = "site", slideBy = -1, NewVar='prcpLagged1')
fullDataSync <- slide(fullDataSync, Var = "prcp", GroupVar = "site", slideBy = -2, NewVar='prcpLagged2')
fullDataSync <- slide(fullDataSync, Var = "prcp", GroupVar = "site", slideBy = -3, NewVar='prcpLagged3')
fullDataSync <- fullDataSync[ , c("agency", "date", "AgencyID", "year", "site", "date", "finalSpringBP", "finalFallBP", "FEATUREID", "HUC4", "HUC8", "HUC12", "temp", "Latitude", "Longitude", "airTemp", "airTempLagged1", "airTempLagged2", "prcp", "prcpLagged1", "prcpLagged2", "prcpLagged3", "dOY", "Forest", "Herbacious", "Agriculture", "Developed", "TotDASqKM", "ReachElevationM", "ImpoundmentsAllSqKM", "HydrologicGroupAB", "SurficialCoarseC", "CONUSWetland", "ReachSlopePCNT", "srad", "dayl", "swe")] #
# Standardize and Add Covariates and Indices for Analysis
fullDataSyncS <- stdCovs(x = fullDataSync, y = tempDataSync, var.names = var.names)
fullDataSyncS <- addInteractions(fullDataSyncS)
fullDataSyncS <- indexDeployments(fullDataSyncS, regional = TRUE)
#firstObsRowsFull <- createFirstRows(fullDataSyncS)
#evalRowsFull <- createEvalRows(fullDataSyncS)
# Make predictions
fullDataSyncS <- mutate(fullDataSyncS, huc = HUC8)
fullDataSyncS <- predictTemp(data = fullDataSyncS, coef.list = coef.list, cov.list = cov.list)
# Add predictions back to original (unscaled) dataframe
fullDataSync <- left_join(fullDataSync, select(fullDataSyncS, site, date, tempPredicted), by = c("site", "date"))
###Derived metrics
derived.site.metrics <- deriveMetrics(fullDataSync)
write.table(derived.site.metrics, file = 'reports/MADEP/derived_site_metrics.csv', sep = ',', row.names = F)
# }
@walkerjeffd
Copy link

this will probably work, I didn't run the last few lines myself because it will probably take a while:

# create a query that lists all locations and creates 'site' column
qry_locations <- tbl(db, 'locations') %>%
  rename(location_id=id, location_name=name, featureid=catchment_id) %>%
  select(location_id, location_name, latitude, longitude, featureid, agency_id) %>%
  left_join(tbl(db, 'agencies') %>% rename(agency_name=name, agency_id=id) %>% select(agency_name, agency_id)) %>%
  select(location_id, location_name, featureid, latitude, longitude, agency_name) %>%
  mutate(site=concat(agency_name, '_', location_name))

head(qry_locations)

# check that all sites are unique
stopifnot(sum(duplicated(collect(qry_locations)$site))==0)

# number of duplicated featureids > 0 (this is ok)
sum(duplicated(collect(qry_locations)$featureid))

# left join qry_locations to daymet to get daymet timeseries for each unique site
qry_daymet <- select(qry_locations, site, featureid) %>%
  left_join(tbl(db, 'daymet'), by='featureid') %>%
  mutate(airTemp = (tmax + tmin)/2)

tbl_daymet <- collect(qry_daymet)

The idea is to first create a query that lists all the locations and adds the site column. Then take just the site and featureid columns from that table, and left join with daymet. The end result should be a table with columns [site, featureid, date, tmax, tmin, prcp, dayl, srad, vp, swe, airTemp] that has the daily daymet values for each unique site.

@djhocking
Copy link
Author

Thanks. This looks like it should work for the question I asked. Unfortunately, I realized that I need this for every catchment in the daymet record rather than every catchment (site) in the observed locations table. I can just reverse the order of the left_join but then handle the NA for site in the predict function (which it should already do). I just changed the order of the left_join and added a filter so I can do it in chunks.

  # left join qry_locations to daymet to get daymet timeseries for each unique site
  qry_daymet <- tbl(db, 'daymet') %>%
    left_join(select(qry_locations, site, featureid), by='featureid') %>%
    filter(featureid %in% catchmentid) %>%
    mutate(airTemp = (tmax + tmin)/2)

df_daymet <- collect(qry_daymet)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment