Skip to content

Instantly share code, notes, and snippets.

@salvelinusbob
Last active November 27, 2017 13:13
Show Gist options
  • Save salvelinusbob/26b94b531527742a7f108541a650a02e to your computer and use it in GitHub Desktop.
Save salvelinusbob/26b94b531527742a7f108541a650a02e to your computer and use it in GitHub Desktop.
require(plyr)
require(stringr)
require(dataRetrieval)
require(geoknife)
library(zoo)
library(imputeTS)
library(lubridate)
library(bsts)
library(dplyr)
library(ggplot2)
library(readr)
######Enter site numbers to be evaluated
forecast_sites <-c("441833089315601")
predictor<-TRUE
#predictor<-FALSE
predictor_sites <-c("444709089265301")
conf_int<-95
cred_int<-50
forecastyr<-3
if(predictor == TRUE){site_no<-c(forecast_sites, predictor_sites)} else {site_no<-c(forecast_sites)}
#####Get data for site numbers data_avail<-whatNWISdata(siteNumber =
data_avail<-whatNWISdata(siteNumber = c(site_no), parameterCd = c("72019", "00060", "00065"))
site_desc<-readNWISsite(c(site_no))
if(any(data_avail$data_type_cd =="gw")){gw_data<-readNWISgwl(site_no, convertType = FALSE)[,c(4, 7, 2)]}
if(exists("gw_data")){gw_data$lev_dt<-as.Date(gw_data$lev_dt, "%Y-%m-%d")}
if(any(data_avail$data_type_cd =="dv" & data_avail$parm_cd=="72019")){dv_gw_data<-readNWISdv(site_no, parameterCd = "72019" , statCd = "00001")[,c(3, 4, 2)]}
if(any(data_avail$data_type_cd =="dv" & data_avail$parm_cd=="00060")){dv_cfs_data<-readNWISdv(site_no, parameterCd = "00060" , statCd = "00003")[,c(3, 4, 2)]}
if(any(data_avail$data_type_cd =="dv" & data_avail$parm_cd=="00065")){dv_lvl_data<-readNWISdv(site_no, parameterCd = "00065" , statCd = "00003")[,c(3, 4, 2)]}
if(exists("gw_data")) {colnames(gw_data)<-c("Date", "OBS", "site_no")}
if(exists("dv_gw_data")) {colnames(dv_gw_data)<-c("Date", "OBS", "site_no")}
if(exists("dv_cfs_data")) {colnames(dv_cfs_data)<-c("Date", "OBS", "site_no")}
if(exists("dv_lvl_data")) {colnames(dv_lvl_data)<-c("Date", "OBS", "site_no")}
if(exists("gw_data") & exists("dv_gw_data")) {gwl_data<-rbind(gw_data, dv_gw_data)}
if(exists("gw_data") & exists("dv_gw_data")==FALSE) {gwl_data<-gw_data}
if(exists("gw_data")==FALSE & exists("dv_gw_data")) {gwl_data<-dv_gw_data}
if(exists("gw_data")) {colnames(gwl_data)<-c("Date", "OBS", "site_no")}
if(exists("gwl_data")){gwl_data$OBS<-as.numeric(gwl_data$OBS)}
if(exists("gwl_data")){gwl_data$OBS<-gwl_data$OBS*(-1)}
if(exists("gwl_data")){gwl_data$Unit<-"Ft_BLS"}
if(exists("gwl_data")){gwl_data<-join(gwl_data, site_desc, by= "site_no")[, c(1,2,3,4,6,7,10,11,23)]}
if(exists("gwl_data")){gwl_data$Wat_elev<-gwl_data$alt_va+gwl_data$OBS}
if(exists("dv_cfs_data")){dv_cfs_data$Unit<-"CFS"}
if(exists("dv_cfs_data")){dv_cfs_data<-join(dv_cfs_data, site_desc, by= "site_no")[, c(1,2,3,4,6,7,10,11,23)]}
if(exists("dv_cfs_data")){dv_cfs_data$Wat_elev<-NA}
if(exists("dv_lvl_data")){dv_lvl_data$Unit<-"Elev_Ft"}
if(exists("dv_lvl_data")){dv_lvl_data<-join(dv_lvl_data, site_desc, by= "site_no")[, c(1,2,3,4,6,7,10,11,23)]}
if(exists("dv_lvl_data")){dv_lvl_data$Wat_elev<-dv_lvl_data$alt_va+dv_lvl_data$OBS}
if(exists("dv_cfs_data")) {dv_cfs_data$OBS<-log(dv_cfs_data$OBS)}
lvl_obs<-rbind(if(exists("gwl_data"))gwl_data, if(exists("dv_cfs_data"))dv_cfs_data, if(exists("dv_lvl_data"))dv_lvl_data)
lvl_obs$DateYM<-str_sub(lvl_obs$Date, 1, 7)
if(any (lvl_obs$OBS=="-999999")) lvl_obs$OBS[lvl_obs$OBS=="-999999"]<-NA
#####Combine level data into one monthly table
obs_data_monthly<-aggregate(lvl_obs$OBS ~ lvl_obs$DateYM +lvl_obs$site_no+lvl_obs$Unit , lvl_obs, mean)
colnames(obs_data_monthly)<-c("DateYM", "site_no","Unit", "OBS")
obs_data_monthly$DateYMD<-as.Date(paste(obs_data_monthly$DateYM, "-01", sep=""))
obs_data_monthly<-as.data.frame(obs_data_monthly%>%
group_by(site_no)%>%
mutate(z_score=(OBS-mean(OBS))/sd(OBS)))
#####Create data for potential stencils derived from site data
mean_site_long<-as.numeric(site_desc %>% filter(site_no == forecast_sites) %>% summarize(mean(dec_long_va)))
mean_site_lat<- as.numeric(site_desc %>% filter(site_no == forecast_sites) %>% summarize(mean(dec_lat_va)))
huc8_stencil<-paste("HUC8::", paste(site_desc$huc_cd, collapse = ', '))
#####Get monthly PPT data for selected stencil - comment out unused stencils
#stencil<-webgeom("HUC8:: 07070002") #selected huc8
#stencil<-webgeom(huc8_stencil) #returns the average for all huc8s in the site descriptions
stencil<-simplegeom(c(mean_site_long, mean_site_lat)) #takes the mean of the lat/longs selected
#stencil<-simplegeom(c(-89.8, 44.7))
#stencil<-webgeom("state::Wisconsin")
ppt_job<-geoknife(stencil, (list(times = as.POSIXct(c('1895-01-01','2018-01-01')),url = 'http://cida.usgs.gov/thredds/dodsC/prism_v2', variables = 'ppt')), wait = TRUE)
ppt_data = result(ppt_job)[,c(1,2)]
colnames(ppt_data)<-c("Date", "PPT_MM")
ppt_data$Date<-as.Date(as.POSIXct(ppt_data$Date))
#####calculate and add cumulative deviation from 10yr mean variables
ppt_data$mean10yr<- rollmean(ppt_data$PPT_MM, k=96, align = "right", fill = NA, na.rm=TRUE)
ppt_data$mo_dev10<-(ppt_data$PPT_MM-ppt_data$mean10yr)
ppt_data$mo_dev10[is.na(ppt_data$mo_dev10)]<-0
ppt_data$sum_dev10<-cumsum(ppt_data$mo_dev10)
ppt_data$sum_dev10_Z<-((ppt_data$sum_dev10-mean(ppt_data$sum_dev10))/sd(ppt_data$sum_dev10))
ppt_data$site<-"Precip"
ppt_data$data$Unit<-"MM"
#####************
#ppt_data <- read_csv("~/DNR/Research/Water Levels/ppt_data.csv")
#obs_data_monthly <- read_csv("~/DNR/Research/Water Levels/obs_data_monthly.csv")
Climate_indices <- read_csv("~/Documents/Water Level Data/Climate_indices.csv")
Climate_indices$month<-as.Date(paste(Climate_indices$yyyy, Climate_indices$mm, "01", sep = "-"))
test_station_name<-subset(site_desc, site_no==forecast_sites, select=station_nm)
LVL_BSTS <- obs_data_monthly %>%
filter(site_no==forecast_sites) %>%
mutate(yr = year(DateYMD)) %>%
group_by(yr) %>%
filter(DateYMD >= min(obs_data_monthly$DateYMD[month(obs_data_monthly$DateYMD)==1])) %>%
select(month = DateYMD, obs = OBS, z_score, yr)
minfullyr_obs<-min(LVL_BSTS$month[month(LVL_BSTS$month)==1])
reg_data <- ppt_data %>%
filter(Date >= minfullyr_obs) %>%
select(month = Date, obs = PPT_MM, cmdv = sum_dev10)
if(predictor == TRUE){reg_data<- left_join(reg_data,
obs_data_monthly%>%
filter(site_no == predictor_sites) %>%
select(month=DateYMD, obs = OBS),
by ="month" )}
if(predictor == TRUE){colnames(reg_data)<-(c("month", "ppt_obs", "ppt_cmdv", "lvl_reg"))}
Climate_indices<-Climate_indices %>% select(-yyyy, -mm)
reg_data<-left_join(reg_data, Climate_indices, by = "month")
ppt_start<-as.numeric(min(year(reg_data$month)))
lvl_start_yr<-as.numeric(year(minfullyr_obs))
lvl_start_mo<-1
lvl_end_date<-as.Date(max(reg_data$month))
lvl_end_yr<-as.numeric(year(max(reg_data$month)))
lvl_end_mo<-as.numeric(month(max(reg_data$month)))
reg_zoo<-zoo(reg_data$ppt_cmdv, reg_data$month)
##########Interpolate missing level data and convert to time series
lvl_zoo<- zoo(LVL_BSTS$obs, LVL_BSTS$month)
date_range<- seq.Date(
as.Date(paste(year(min(LVL_BSTS$month)),"-",sep = "",paste(month(min(LVL_BSTS$month)), "-01", sep = ""))),
as.Date(paste(year(max(LVL_BSTS$month)),"-",sep = "",paste(month(max(LVL_BSTS$month)), "-01", sep = ""))),
"month")
date_range<-data.frame(date_range)
colnames(date_range)<-c("month")
lvl_zoo <- merge(
x=date_range,
y=LVL_BSTS,
all.x=TRUE)
lvl_zoo <- na.interpolation(lvl_zoo, option = "linear")
lvl_ts<- ts(select(lvl_zoo, obs),
frequency = 12,
start = c(lvl_start_yr,lvl_start_mo))
reg_zoo<-merge(
x=date_range,
y=reg_data,
all.x=TRUE)
reg_zoo <- na.interpolation(reg_zoo, option = "linear")
# ppt_ts<- ts(select(reg_zoo, ppt_cmdv),
# frequency = 12,
# start = c(lvl_start_yr,lvl_start_mo))
############Set BSTS Parameters
test_range<- window(lvl_ts, start= c(lvl_start_yr,lvl_start_mo), end=c(lvl_end_yr-forecastyr, lvl_end_mo))
full_range<- window(lvl_ts, start= c(lvl_start_yr,lvl_start_mo), end=c(lvl_end_yr, lvl_end_mo))
test_end_date<-paste(year(max(reg_data$month))-forecastyr,
"-",sep = "",
paste(month(max(reg_data$month)),
"-01", sep = ""))
test_start_date<-paste(lvl_start_yr,
"-",(lvl_start_mo),
"-01", sep = "")
###############Start BSTS with regressor
reg_train<- reg_zoo %>%
filter(month<=test_end_date & month >= test_start_date)
reg_train<- select(reg_train, -month)
reg_pred<- reg_zoo %>%
filter(month > test_end_date &month<=lvl_end_date)
reg_pred<- select(reg_pred, -month)
#ss2 <- AddLocalLinearTrend(list(), test_range)
#ss2 <- AddStudentLocalLinearTrend(list(), test_range)
#ss2 <- AddAutoAr(list(), test_range)
ss2 <- AddSemilocalLinearTrend(list(), test_range)
ss2 <- AddSeasonal(ss2, test_range, nseasons = 12)
bsts.lvl.r <- bsts(test_range ~ .,
state.specification = ss2,
data = reg_train,
niter = 1000,
expected.model.size=5)
burn2 <- SuggestBurn(.1, bsts.lvl.r)
lvl.pred.r <- predict.bsts(bsts.lvl.r,
horizon = (forecastyr*12),
newdata = reg_pred, burn = burn2,
quantiles = c( (100-conf_int)/2/100, 1-(100-conf_int)/2/100))
d2r <- data.frame(
c(as.numeric(-colMeans(bsts.lvl.r$one.step.prediction.errors[-(1:burn2),])+test_range),
as.numeric(lvl.pred.r$mean)),
as.numeric(full_range),
as.Date(time(full_range)))
names(d2r) <-c("Fitted", "Actual", "Date")
MAPEr <- filter(d2r, year(Date)>lvl_end_yr-forecastyr) %>%
summarize(MAPEr= mean(abs(Actual-Fitted)/Actual))
posterior.interval.r <- cbind.data.frame(
as.numeric(lvl.pred.r$interval[1,]),
as.numeric(lvl.pred.r$interval[2,]),
subset(d2r, Date>paste(year(max(reg_data$month))-forecastyr,
"-",sep = "",
paste(month(max(reg_data$month)),
"-01", sep = "")))$Date)
names(posterior.interval.r ) <- c("LL", "UL", "Date")
d3r<-left_join(d2r, posterior.interval.r , by = "Date")
credible.interval.r <- cbind.data.frame(
as.numeric(apply(lvl.pred.r$distribution, 2,function(f){quantile(f,(50+cred_int/2)/100)})),
as.numeric(apply(lvl.pred.r$distribution, 2,function(f){median(f)})),
as.numeric(apply(lvl.pred.r$distribution, 2,function(f){quantile(f,(50-cred_int/2)/100)})),
subset(d3r, Date>paste(year(max(reg_data$month))-forecastyr,
"-",sep = "",
paste(month(max(reg_data$month)),
"-01", sep = "")))$Date)
names(credible.interval.r ) <- c("p75", "Median", "p25", "Date")
credible.interval.r %>%
ggplot(aes(x=Date))+
geom_line(aes(y=p75, color = "p75"))+
geom_line(aes(y=p25, color = "p25"))+
geom_line(aes(y=Median, color = "Median"))
d4r<-left_join(d3r, credible.interval.r , by = "Date")
plot(lvl.pred.r)
plot(bsts.lvl.r)
plot(bsts.lvl.r, "components", same.scale = FALSE)
plot(bsts.lvl.r, "coef")
ggplot(data=d3r, aes(x=Date)) +
geom_line(aes(y=Actual, colour = "Actual"), size=1.2) +
geom_line(aes(y=Fitted, colour = "Fitted"), size=1.2, linetype=2) +
theme_bw() + theme(legend.title = element_blank()) + ylab("") + xlab("") +
geom_ribbon(aes(ymin=LL, ymax=UL), fill="grey", alpha=0.5) +
ggtitle(paste0("CMDV MAPE = ",
round(100*MAPEr,2), "% ---- Confidence Interval = ",
conf_int,
"%\nPredicted Water Levels at ",
test_station_name)) +
theme(axis.text.x=element_text(angle = -90, hjust = 0))+
scale_x_date(limits = as.Date(c(paste(lvl_end_yr-20,
"-",(lvl_start_mo),
"-01", sep = ""), lvl_end_date),
date_breaks = "1 years"))
ggplot(data=d4r, aes(x=Date)) +
geom_line(aes(y=Actual, colour = "Actual"), size=1.2) +
geom_line(aes(y=Fitted, colour = "Fitted"), size=1.2, linetype=2) +
theme_bw() + theme(legend.title = element_blank()) + ylab("") + xlab("") +
geom_ribbon(aes(ymin=p25, ymax=p75), fill="grey", alpha=0.5) +
ggtitle(paste0("CMDV MAPE = ",
round(100*MAPEr,2), "% ---- Credible Interval = ",
cred_int,
"%\nPredicted Water Levels at ",
test_station_name)) +
theme(axis.text.x=element_text(angle = -90, hjust = 0))+
scale_x_date(limits = as.Date(c(paste(lvl_end_yr-20,
"-",(lvl_start_mo),
"-01", sep = ""), lvl_end_date),
date_breaks = "1 years"))
lvl_lm<-lm(lvl_zoo$obs~reg_zoo$ppt_cmdv+reg_zoo$lvl_reg+reg_zoo$ppt_obs)
summary(lvl_lm)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment