Skip to content

Instantly share code, notes, and snippets.

@dblodgett-usgs
Last active February 9, 2018 13:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dblodgett-usgs/b1cdd3dd748a4cd53cdb14708b92378c to your computer and use it in GitHub Desktop.
Save dblodgett-usgs/b1cdd3dd748a4cd53cdb14708b92378c to your computer and use it in GitHub Desktop.
Adds names to unnamed nhdplus flowlines
library(sf)
library(dplyr)
data_path <- "~/Documents/Projects/WaterSmart/5_data/NHDPlusV21_National_Seamless.gdb"
network <- st_read(data_path,
layer = "NHDFlowline_Network", stringsAsFactors = F)
net_data <- network
st_geometry(net_data) <- NULL
net_data <- select(net_data, COMID, GNIS_NAME, GNIS_ID, LevelPathI, Hydroseq, DnHydroseq) %>%
arrange(Hydroseq)
lpids <- sort(unique(as.numeric(net_data$LevelPathI)))
# j is used to manual restarts.
j <- 1
for(i in j:length(lpids)) {
lpid <- lpids[i]
# This may be helpful for use with which() later.
lp <- net_data$LevelPathI == lpid
# The level path that we are interested in.
lp_data <- filter(net_data, lp)
# All the unique names along the level path.
uNames <- unique(lp_data$GNIS_NAME)
if(length(uNames)>1 & " " %in% uNames) { # if we have more than one name and some are missing.
# Starting from downstream, we will do one thing until this is false.
ds <- TRUE
# Just precalculate all the test stuff.
missing <- lp_data$GNIS_NAME == " "
for(ctmt in 1:nrow(lp_data)) {
# If no name down stream but some upstream, extend name downstream?
if(ds & missing[ctmt]) {
# Grab a good name from the unique names list.
# 2 should be the first name upstream of the missing downstream names.
lp_data$GNIS_NAME[ctmt] <- paste("Downstream of", uNames[2])
lp_data$GNIS_ID[ctmt] <- paste0("991", lp_data$GNIS_ID[which(lp_data$GNIS_NAME == uNames[2])[1]])
if(!(!uNames[2] == " " & uNames[1] == " ")) { # check to make sure this is valid.
stop("downstream name replacement logic is broken")
}
}
# If we found a good name
if (!missing[ctmt]) {
# Set ds to false
ds <- FALSE
# Keep track of the latest good name we've seen.
ds_name <- lp_data$GNIS_NAME[ctmt]
ds_id <- lp_data$GNIS_ID[ctmt]
}
# If no name and upstream of something.
if (!ds & missing[ctmt]) {
lp_data$GNIS_NAME[ctmt] <- paste("Upstream of", ds_name)
lp_data$GNIS_ID[ctmt] <- paste0("992", ds_id)
}
}
net_data[which(lp),] <- lp_data
print(paste("mainstem names", i))
}
if(length(uNames) == 1 & " " %in% uNames) { # if all the names are missing
# Find the downstream name/id
ds_hydroseq <- lp_data$DnHydroseq[which(lp_data$Hydroseq == min(lp_data$Hydroseq))]
ds_index <- which(net_data$Hydroseq == ds_hydroseq)
ds_name <- net_data$GNIS_NAME[ds_index][1]
ds_id <- net_data$GNIS_ID[ds_index][1]
if(length(ds_name) == 0) {
warning("found a 0 length name")
ds_name <- "Unnamed Outlet"
ds_id <- " Unnamed Outlet"
}
# Get rid of the added stuff from the downstream name and id.
if(grepl("Tributary of ", ds_name)) {
ds_name <- stringr::str_replace(ds_name, "Tributary of ", "")
ds_id <- stringr::str_replace(ds_id, "993", "")
}
if(grepl("Downstream of ", ds_name)) {
ds_name <- stringr::str_replace(ds_name, "Downstream of ", "")
ds_id <- stringr::str_replace(ds_id, "991", "")
}
if(grepl("Upstream of ", ds_name)) {
ds_name <- stringr::str_replace(ds_name, "Upstream of ", "")
ds_id <- stringr::str_replace(ds_id, "992", "")
}
lp_data$GNIS_NAME <- paste("Tributary of", ds_name)
lp_data$GNIS_ID <- paste0("993", ds_id)
net_data[which(lp),] <- lp_data
print(paste("tributary names", i))
}
}
# Use this to manual restart on failure.
j <- i - 1
names(net_data) <- c("COMID", "NAME", "ID", "LevelPathI", "Hydroseq", "DnHydroseq")
write.table(net_data[c("COMID", "NAME")], "modified_nhdplusv2_names.tsv", quote = TRUE, sep = "\t")
gids <- data.frame(COMID = network$COMID, GNIS_ID = network$GNIS_ID, stringsAsFactors = F)
gids <- filter(gids, GNIS_ID == " ")
only_changed <- left_join(gids, net_data, by = "COMID") %>%
rename(SOURCE_ID = GNIS_ID, CHANGE_TYPE = ID) %>%
select(-LevelPathI, -Hydroseq, -DnHydroseq) %>%
mutate(SOURCE_ID = stringr::str_sub(CHANGE_TYPE, start = 4), CHANGE_TYPE = stringr::str_sub(CHANGE_TYPE, end = 3)) %>%
mutate(CHANGE_TYPE = ifelse(CHANGE_TYPE == "991", "Downstream",
ifelse(CHANGE_TYPE == "992", "Upstream",
ifelse(CHANGE_TYPE == "993", "Tributary", "Unknown"))))
any(only_changed$CHANGE_TYPE == "Unknown")
any(only_changed$NAME == " ")
unknown <- filter(only_changed, CHANGE_TYPE == "Unknown")
write.table(only_changed, "only_modified_nhdplsv2_names.tsv", quote = TRUE, sep = "\t")
write.table(unknown, "unhandled_modified_nhdplsv2_names.tsv", quote = TRUE, sep = "\t")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment