Skip to content

Instantly share code, notes, and snippets.

@dblodgett-usgs
Last active December 2, 2018 20:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dblodgett-usgs/40d77577e2e0328145a116694811b5ab to your computer and use it in GitHub Desktop.
Save dblodgett-usgs/40d77577e2e0328145a116694811b5ab to your computer and use it in GitHub Desktop.
Script to determine what stream gages are upstream of each streamgage in the gage layer included in the NHDPlus V2 database. See comments below for description.
library(nhdplusTools)
library(sf)
library(igraph)
library(dplyr)
library(tidyr)
library(readr)
library(jsonlite)
nhdplus_path("../../4_data/NHDPlusNationalData/NHDPlusV21_National_Seamless.gdb/")
nhd_paths <- stage_national_data()
network <- readRDS(nhd_paths$attributes)
networks <- select(network, COMID, TerminalPa)
network_conditioned <- prepare_nhdplus(network, 0, 0, FALSE) %>%
left_join(networks, by = "COMID")
gages <- read_sf(nhdplus_path(), "Gage")
gages <- select(gages, COMID = FLComID, siteID = SOURCE_FEA) %>%
st_set_geometry(NULL)
# Build catchment and nexus data.frames to preserve sanity in graph traversal.
catchment <- network_conditioned %>%
mutate(toCOMID = ifelse(is.na(toCOMID), 0, toCOMID))
# Join id to toCOMID use COMID as from nexus COMID since we are assuming dendritic.
nexus <- left_join(select(catchment, toCOMID = COMID),
select(catchment, NULL, fromID = COMID, toCOMID),
by = "toCOMID") %>%
mutate(nexID = paste0("nex-", toCOMID),
fromID = paste0("cat-", fromID),
toCOMID = paste0("cat-", toCOMID)) %>%
select(nexID, fromID, toCOMID)
# get fromID and toCOMID straight with "nex-" prefix
catchment <- mutate(catchment,
fromID = paste0("nex-", COMID),
toCOMID = paste0("nex-", toCOMID)) %>%
select(fromID, toCOMID, COMID, TerminalPa)
terms <- unique(catchment$TerminalPa)
all_networks <- rep(list(list()), length(terms))
for(n in seq_along(terms)) {
term <- terms[n]
g <- catchment %>%
filter(TerminalPa == term) %>%
select(fromID, toCOMID, COMID) %>%
graph_from_data_frame(directed = TRUE)
search <- filter(gages, COMID %in% edge.attributes(g)$COMID)$COMID
connected <- rep(list(list()), length(search))
if(length(search) > 0) {
# Vertices upstream of a catchment have the catchment id in them.
Vs_names <- names(V(g))
Vs <- which(Vs_names %in% paste0("nex-", as.character(search)))
Vs_names <- Vs_names[Vs]
for(i in seq_len(length(Vs))) {
paths <- all_simple_paths(g, Vs[i], Vs, mode = "in")
for(p in seq_len(length(paths))) {
connected[[i]] <- c(connected[[i]], Vs_names[which(Vs_names %in% names(paths[[p]]))])
}
connected[[i]] <- unique(connected[[i]])
connected[[i]] <- connected[[i]][!connected[[i]] == Vs_names[i]]
connected[[i]] <- as.integer(gsub("nex-", "", connected[[i]]))
}
names(connected) <- gsub("nex-", "", Vs_names)
}
if(n %% 100==0) {
cat(paste0("iteration: ", n, " of ", length(terms), "\n"))
}
all_networks[[n]] <- connected
}
saveRDS(all_networks, "all_networks.rds")
all_networks <- readRDS("all_networks.rds")
all_networks <- do.call(c, all_networks)
all_networks <- data.frame(outlet = names(all_networks),
upstream = I(unname(all_networks)),
stringsAsFactors = FALSE)
all_networks <- all_networks[which(lengths(all_networks$upstream) != 0),]
all_networks <- unnest(all_networks) %>%
mutate(outlet = as.integer(outlet))
all_networks <- left_join(all_networks, select(gages, COMID, outlet_siteID = siteID), by = c("outlet" = "COMID"))
all_networks <- left_join(all_networks, select(gages, COMID, upstream_siteID = siteID), by = c("upstream" = "COMID"))
all_networks <- select(all_networks, outlet_siteID, upstream_siteID)
write.table(all_networks, "all_upstream.tsv", quote = TRUE, sep = "\t", row.names = F)
all_networks <- split(all_networks$upstream_siteID, all_networks$outlet_siteID)
write_json(all_networks, "all_upstream.json", pretty = TRUE)
zip("find_upstream_gages_outputs.zip", files = c("all_upstream.json", "all_upstream.tsv"))
@dblodgett-usgs
Copy link
Author

This code treats the NHDPlus catchment network as an edge-node graph where catchments (the union of catchment and flowline identified by a COMID) are considered to be edges.

iGraph can take an edge list as created on this line.

The code creates a sub-network for each discrete network in the dataset based on terminal path IDs.

It selects the potentially relevant gages.

Then iterates over each gage finding all paths that connect to upstream potentially relevant gages.

While the complete path (all catchment ids between two connected gages) is returned the code just selects the catchment the gage is indexed to and throws the rest away.

Since siteIDs are not used in the graph traversal, the results need to be joined back to the siteIDs and written out.

Good fun!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment