-
-
Save dblodgett-usgs/b1cdd3dd748a4cd53cdb14708b92378c to your computer and use it in GitHub Desktop.
Adds names to unnamed nhdplus flowlines
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(sf) | |
library(dplyr) | |
data_path <- "~/Documents/Projects/WaterSmart/5_data/NHDPlusV21_National_Seamless.gdb" | |
network <- st_read(data_path, | |
layer = "NHDFlowline_Network", stringsAsFactors = F) | |
net_data <- network | |
st_geometry(net_data) <- NULL | |
net_data <- select(net_data, COMID, GNIS_NAME, GNIS_ID, LevelPathI, Hydroseq, DnHydroseq) %>% | |
arrange(Hydroseq) | |
lpids <- sort(unique(as.numeric(net_data$LevelPathI))) | |
# j is used to manual restarts. | |
j <- 1 | |
for(i in j:length(lpids)) { | |
lpid <- lpids[i] | |
# This may be helpful for use with which() later. | |
lp <- net_data$LevelPathI == lpid | |
# The level path that we are interested in. | |
lp_data <- filter(net_data, lp) | |
# All the unique names along the level path. | |
uNames <- unique(lp_data$GNIS_NAME) | |
if(length(uNames)>1 & " " %in% uNames) { # if we have more than one name and some are missing. | |
# Starting from downstream, we will do one thing until this is false. | |
ds <- TRUE | |
# Just precalculate all the test stuff. | |
missing <- lp_data$GNIS_NAME == " " | |
for(ctmt in 1:nrow(lp_data)) { | |
# If no name down stream but some upstream, extend name downstream? | |
if(ds & missing[ctmt]) { | |
# Grab a good name from the unique names list. | |
# 2 should be the first name upstream of the missing downstream names. | |
lp_data$GNIS_NAME[ctmt] <- paste("Downstream of", uNames[2]) | |
lp_data$GNIS_ID[ctmt] <- paste0("991", lp_data$GNIS_ID[which(lp_data$GNIS_NAME == uNames[2])[1]]) | |
if(!(!uNames[2] == " " & uNames[1] == " ")) { # check to make sure this is valid. | |
stop("downstream name replacement logic is broken") | |
} | |
} | |
# If we found a good name | |
if (!missing[ctmt]) { | |
# Set ds to false | |
ds <- FALSE | |
# Keep track of the latest good name we've seen. | |
ds_name <- lp_data$GNIS_NAME[ctmt] | |
ds_id <- lp_data$GNIS_ID[ctmt] | |
} | |
# If no name and upstream of something. | |
if (!ds & missing[ctmt]) { | |
lp_data$GNIS_NAME[ctmt] <- paste("Upstream of", ds_name) | |
lp_data$GNIS_ID[ctmt] <- paste0("992", ds_id) | |
} | |
} | |
net_data[which(lp),] <- lp_data | |
print(paste("mainstem names", i)) | |
} | |
if(length(uNames) == 1 & " " %in% uNames) { # if all the names are missing | |
# Find the downstream name/id | |
ds_hydroseq <- lp_data$DnHydroseq[which(lp_data$Hydroseq == min(lp_data$Hydroseq))] | |
ds_index <- which(net_data$Hydroseq == ds_hydroseq) | |
ds_name <- net_data$GNIS_NAME[ds_index][1] | |
ds_id <- net_data$GNIS_ID[ds_index][1] | |
if(length(ds_name) == 0) { | |
warning("found a 0 length name") | |
ds_name <- "Unnamed Outlet" | |
ds_id <- " Unnamed Outlet" | |
} | |
# Get rid of the added stuff from the downstream name and id. | |
if(grepl("Tributary of ", ds_name)) { | |
ds_name <- stringr::str_replace(ds_name, "Tributary of ", "") | |
ds_id <- stringr::str_replace(ds_id, "993", "") | |
} | |
if(grepl("Downstream of ", ds_name)) { | |
ds_name <- stringr::str_replace(ds_name, "Downstream of ", "") | |
ds_id <- stringr::str_replace(ds_id, "991", "") | |
} | |
if(grepl("Upstream of ", ds_name)) { | |
ds_name <- stringr::str_replace(ds_name, "Upstream of ", "") | |
ds_id <- stringr::str_replace(ds_id, "992", "") | |
} | |
lp_data$GNIS_NAME <- paste("Tributary of", ds_name) | |
lp_data$GNIS_ID <- paste0("993", ds_id) | |
net_data[which(lp),] <- lp_data | |
print(paste("tributary names", i)) | |
} | |
} | |
# Use this to manual restart on failure. | |
j <- i - 1 | |
names(net_data) <- c("COMID", "NAME", "ID", "LevelPathI", "Hydroseq", "DnHydroseq") | |
write.table(net_data[c("COMID", "NAME")], "modified_nhdplusv2_names.tsv", quote = TRUE, sep = "\t") | |
gids <- data.frame(COMID = network$COMID, GNIS_ID = network$GNIS_ID, stringsAsFactors = F) | |
gids <- filter(gids, GNIS_ID == " ") | |
only_changed <- left_join(gids, net_data, by = "COMID") %>% | |
rename(SOURCE_ID = GNIS_ID, CHANGE_TYPE = ID) %>% | |
select(-LevelPathI, -Hydroseq, -DnHydroseq) %>% | |
mutate(SOURCE_ID = stringr::str_sub(CHANGE_TYPE, start = 4), CHANGE_TYPE = stringr::str_sub(CHANGE_TYPE, end = 3)) %>% | |
mutate(CHANGE_TYPE = ifelse(CHANGE_TYPE == "991", "Downstream", | |
ifelse(CHANGE_TYPE == "992", "Upstream", | |
ifelse(CHANGE_TYPE == "993", "Tributary", "Unknown")))) | |
any(only_changed$CHANGE_TYPE == "Unknown") | |
any(only_changed$NAME == " ") | |
unknown <- filter(only_changed, CHANGE_TYPE == "Unknown") | |
write.table(only_changed, "only_modified_nhdplsv2_names.tsv", quote = TRUE, sep = "\t") | |
write.table(unknown, "unhandled_modified_nhdplsv2_names.tsv", quote = TRUE, sep = "\t") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment