Last active
November 27, 2017 13:13
-
-
Save salvelinusbob/26b94b531527742a7f108541a650a02e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require(plyr) | |
require(stringr) | |
require(dataRetrieval) | |
require(geoknife) | |
library(zoo) | |
library(imputeTS) | |
library(lubridate) | |
library(bsts) | |
library(dplyr) | |
library(ggplot2) | |
library(readr) | |
######Enter site numbers to be evaluated | |
forecast_sites <-c("441833089315601") | |
predictor<-TRUE | |
#predictor<-FALSE | |
predictor_sites <-c("444709089265301") | |
conf_int<-95 | |
cred_int<-50 | |
forecastyr<-3 | |
if(predictor == TRUE){site_no<-c(forecast_sites, predictor_sites)} else {site_no<-c(forecast_sites)} | |
#####Get data for site numbers data_avail<-whatNWISdata(siteNumber = | |
data_avail<-whatNWISdata(siteNumber = c(site_no), parameterCd = c("72019", "00060", "00065")) | |
site_desc<-readNWISsite(c(site_no)) | |
if(any(data_avail$data_type_cd =="gw")){gw_data<-readNWISgwl(site_no, convertType = FALSE)[,c(4, 7, 2)]} | |
if(exists("gw_data")){gw_data$lev_dt<-as.Date(gw_data$lev_dt, "%Y-%m-%d")} | |
if(any(data_avail$data_type_cd =="dv" & data_avail$parm_cd=="72019")){dv_gw_data<-readNWISdv(site_no, parameterCd = "72019" , statCd = "00001")[,c(3, 4, 2)]} | |
if(any(data_avail$data_type_cd =="dv" & data_avail$parm_cd=="00060")){dv_cfs_data<-readNWISdv(site_no, parameterCd = "00060" , statCd = "00003")[,c(3, 4, 2)]} | |
if(any(data_avail$data_type_cd =="dv" & data_avail$parm_cd=="00065")){dv_lvl_data<-readNWISdv(site_no, parameterCd = "00065" , statCd = "00003")[,c(3, 4, 2)]} | |
if(exists("gw_data")) {colnames(gw_data)<-c("Date", "OBS", "site_no")} | |
if(exists("dv_gw_data")) {colnames(dv_gw_data)<-c("Date", "OBS", "site_no")} | |
if(exists("dv_cfs_data")) {colnames(dv_cfs_data)<-c("Date", "OBS", "site_no")} | |
if(exists("dv_lvl_data")) {colnames(dv_lvl_data)<-c("Date", "OBS", "site_no")} | |
if(exists("gw_data") & exists("dv_gw_data")) {gwl_data<-rbind(gw_data, dv_gw_data)} | |
if(exists("gw_data") & exists("dv_gw_data")==FALSE) {gwl_data<-gw_data} | |
if(exists("gw_data")==FALSE & exists("dv_gw_data")) {gwl_data<-dv_gw_data} | |
if(exists("gw_data")) {colnames(gwl_data)<-c("Date", "OBS", "site_no")} | |
if(exists("gwl_data")){gwl_data$OBS<-as.numeric(gwl_data$OBS)} | |
if(exists("gwl_data")){gwl_data$OBS<-gwl_data$OBS*(-1)} | |
if(exists("gwl_data")){gwl_data$Unit<-"Ft_BLS"} | |
if(exists("gwl_data")){gwl_data<-join(gwl_data, site_desc, by= "site_no")[, c(1,2,3,4,6,7,10,11,23)]} | |
if(exists("gwl_data")){gwl_data$Wat_elev<-gwl_data$alt_va+gwl_data$OBS} | |
if(exists("dv_cfs_data")){dv_cfs_data$Unit<-"CFS"} | |
if(exists("dv_cfs_data")){dv_cfs_data<-join(dv_cfs_data, site_desc, by= "site_no")[, c(1,2,3,4,6,7,10,11,23)]} | |
if(exists("dv_cfs_data")){dv_cfs_data$Wat_elev<-NA} | |
if(exists("dv_lvl_data")){dv_lvl_data$Unit<-"Elev_Ft"} | |
if(exists("dv_lvl_data")){dv_lvl_data<-join(dv_lvl_data, site_desc, by= "site_no")[, c(1,2,3,4,6,7,10,11,23)]} | |
if(exists("dv_lvl_data")){dv_lvl_data$Wat_elev<-dv_lvl_data$alt_va+dv_lvl_data$OBS} | |
if(exists("dv_cfs_data")) {dv_cfs_data$OBS<-log(dv_cfs_data$OBS)} | |
lvl_obs<-rbind(if(exists("gwl_data"))gwl_data, if(exists("dv_cfs_data"))dv_cfs_data, if(exists("dv_lvl_data"))dv_lvl_data) | |
lvl_obs$DateYM<-str_sub(lvl_obs$Date, 1, 7) | |
if(any (lvl_obs$OBS=="-999999")) lvl_obs$OBS[lvl_obs$OBS=="-999999"]<-NA | |
#####Combine level data into one monthly table | |
obs_data_monthly<-aggregate(lvl_obs$OBS ~ lvl_obs$DateYM +lvl_obs$site_no+lvl_obs$Unit , lvl_obs, mean) | |
colnames(obs_data_monthly)<-c("DateYM", "site_no","Unit", "OBS") | |
obs_data_monthly$DateYMD<-as.Date(paste(obs_data_monthly$DateYM, "-01", sep="")) | |
obs_data_monthly<-as.data.frame(obs_data_monthly%>% | |
group_by(site_no)%>% | |
mutate(z_score=(OBS-mean(OBS))/sd(OBS))) | |
#####Create data for potential stencils derived from site data | |
mean_site_long<-as.numeric(site_desc %>% filter(site_no == forecast_sites) %>% summarize(mean(dec_long_va))) | |
mean_site_lat<- as.numeric(site_desc %>% filter(site_no == forecast_sites) %>% summarize(mean(dec_lat_va))) | |
huc8_stencil<-paste("HUC8::", paste(site_desc$huc_cd, collapse = ', ')) | |
#####Get monthly PPT data for selected stencil - comment out unused stencils | |
#stencil<-webgeom("HUC8:: 07070002") #selected huc8 | |
#stencil<-webgeom(huc8_stencil) #returns the average for all huc8s in the site descriptions | |
stencil<-simplegeom(c(mean_site_long, mean_site_lat)) #takes the mean of the lat/longs selected | |
#stencil<-simplegeom(c(-89.8, 44.7)) | |
#stencil<-webgeom("state::Wisconsin") | |
ppt_job<-geoknife(stencil, (list(times = as.POSIXct(c('1895-01-01','2018-01-01')),url = 'http://cida.usgs.gov/thredds/dodsC/prism_v2', variables = 'ppt')), wait = TRUE) | |
ppt_data = result(ppt_job)[,c(1,2)] | |
colnames(ppt_data)<-c("Date", "PPT_MM") | |
ppt_data$Date<-as.Date(as.POSIXct(ppt_data$Date)) | |
#####calculate and add cumulative deviation from 10yr mean variables | |
ppt_data$mean10yr<- rollmean(ppt_data$PPT_MM, k=96, align = "right", fill = NA, na.rm=TRUE) | |
ppt_data$mo_dev10<-(ppt_data$PPT_MM-ppt_data$mean10yr) | |
ppt_data$mo_dev10[is.na(ppt_data$mo_dev10)]<-0 | |
ppt_data$sum_dev10<-cumsum(ppt_data$mo_dev10) | |
ppt_data$sum_dev10_Z<-((ppt_data$sum_dev10-mean(ppt_data$sum_dev10))/sd(ppt_data$sum_dev10)) | |
ppt_data$site<-"Precip" | |
ppt_data$data$Unit<-"MM" | |
#####************ | |
#ppt_data <- read_csv("~/DNR/Research/Water Levels/ppt_data.csv") | |
#obs_data_monthly <- read_csv("~/DNR/Research/Water Levels/obs_data_monthly.csv") | |
Climate_indices <- read_csv("~/Documents/Water Level Data/Climate_indices.csv") | |
Climate_indices$month<-as.Date(paste(Climate_indices$yyyy, Climate_indices$mm, "01", sep = "-")) | |
test_station_name<-subset(site_desc, site_no==forecast_sites, select=station_nm) | |
LVL_BSTS <- obs_data_monthly %>% | |
filter(site_no==forecast_sites) %>% | |
mutate(yr = year(DateYMD)) %>% | |
group_by(yr) %>% | |
filter(DateYMD >= min(obs_data_monthly$DateYMD[month(obs_data_monthly$DateYMD)==1])) %>% | |
select(month = DateYMD, obs = OBS, z_score, yr) | |
minfullyr_obs<-min(LVL_BSTS$month[month(LVL_BSTS$month)==1]) | |
reg_data <- ppt_data %>% | |
filter(Date >= minfullyr_obs) %>% | |
select(month = Date, obs = PPT_MM, cmdv = sum_dev10) | |
if(predictor == TRUE){reg_data<- left_join(reg_data, | |
obs_data_monthly%>% | |
filter(site_no == predictor_sites) %>% | |
select(month=DateYMD, obs = OBS), | |
by ="month" )} | |
if(predictor == TRUE){colnames(reg_data)<-(c("month", "ppt_obs", "ppt_cmdv", "lvl_reg"))} | |
Climate_indices<-Climate_indices %>% select(-yyyy, -mm) | |
reg_data<-left_join(reg_data, Climate_indices, by = "month") | |
ppt_start<-as.numeric(min(year(reg_data$month))) | |
lvl_start_yr<-as.numeric(year(minfullyr_obs)) | |
lvl_start_mo<-1 | |
lvl_end_date<-as.Date(max(reg_data$month)) | |
lvl_end_yr<-as.numeric(year(max(reg_data$month))) | |
lvl_end_mo<-as.numeric(month(max(reg_data$month))) | |
reg_zoo<-zoo(reg_data$ppt_cmdv, reg_data$month) | |
##########Interpolate missing level data and convert to time series | |
lvl_zoo<- zoo(LVL_BSTS$obs, LVL_BSTS$month) | |
date_range<- seq.Date( | |
as.Date(paste(year(min(LVL_BSTS$month)),"-",sep = "",paste(month(min(LVL_BSTS$month)), "-01", sep = ""))), | |
as.Date(paste(year(max(LVL_BSTS$month)),"-",sep = "",paste(month(max(LVL_BSTS$month)), "-01", sep = ""))), | |
"month") | |
date_range<-data.frame(date_range) | |
colnames(date_range)<-c("month") | |
lvl_zoo <- merge( | |
x=date_range, | |
y=LVL_BSTS, | |
all.x=TRUE) | |
lvl_zoo <- na.interpolation(lvl_zoo, option = "linear") | |
lvl_ts<- ts(select(lvl_zoo, obs), | |
frequency = 12, | |
start = c(lvl_start_yr,lvl_start_mo)) | |
reg_zoo<-merge( | |
x=date_range, | |
y=reg_data, | |
all.x=TRUE) | |
reg_zoo <- na.interpolation(reg_zoo, option = "linear") | |
# ppt_ts<- ts(select(reg_zoo, ppt_cmdv), | |
# frequency = 12, | |
# start = c(lvl_start_yr,lvl_start_mo)) | |
############Set BSTS Parameters | |
test_range<- window(lvl_ts, start= c(lvl_start_yr,lvl_start_mo), end=c(lvl_end_yr-forecastyr, lvl_end_mo)) | |
full_range<- window(lvl_ts, start= c(lvl_start_yr,lvl_start_mo), end=c(lvl_end_yr, lvl_end_mo)) | |
test_end_date<-paste(year(max(reg_data$month))-forecastyr, | |
"-",sep = "", | |
paste(month(max(reg_data$month)), | |
"-01", sep = "")) | |
test_start_date<-paste(lvl_start_yr, | |
"-",(lvl_start_mo), | |
"-01", sep = "") | |
###############Start BSTS with regressor | |
reg_train<- reg_zoo %>% | |
filter(month<=test_end_date & month >= test_start_date) | |
reg_train<- select(reg_train, -month) | |
reg_pred<- reg_zoo %>% | |
filter(month > test_end_date &month<=lvl_end_date) | |
reg_pred<- select(reg_pred, -month) | |
#ss2 <- AddLocalLinearTrend(list(), test_range) | |
#ss2 <- AddStudentLocalLinearTrend(list(), test_range) | |
#ss2 <- AddAutoAr(list(), test_range) | |
ss2 <- AddSemilocalLinearTrend(list(), test_range) | |
ss2 <- AddSeasonal(ss2, test_range, nseasons = 12) | |
bsts.lvl.r <- bsts(test_range ~ ., | |
state.specification = ss2, | |
data = reg_train, | |
niter = 1000, | |
expected.model.size=5) | |
burn2 <- SuggestBurn(.1, bsts.lvl.r) | |
lvl.pred.r <- predict.bsts(bsts.lvl.r, | |
horizon = (forecastyr*12), | |
newdata = reg_pred, burn = burn2, | |
quantiles = c( (100-conf_int)/2/100, 1-(100-conf_int)/2/100)) | |
d2r <- data.frame( | |
c(as.numeric(-colMeans(bsts.lvl.r$one.step.prediction.errors[-(1:burn2),])+test_range), | |
as.numeric(lvl.pred.r$mean)), | |
as.numeric(full_range), | |
as.Date(time(full_range))) | |
names(d2r) <-c("Fitted", "Actual", "Date") | |
MAPEr <- filter(d2r, year(Date)>lvl_end_yr-forecastyr) %>% | |
summarize(MAPEr= mean(abs(Actual-Fitted)/Actual)) | |
posterior.interval.r <- cbind.data.frame( | |
as.numeric(lvl.pred.r$interval[1,]), | |
as.numeric(lvl.pred.r$interval[2,]), | |
subset(d2r, Date>paste(year(max(reg_data$month))-forecastyr, | |
"-",sep = "", | |
paste(month(max(reg_data$month)), | |
"-01", sep = "")))$Date) | |
names(posterior.interval.r ) <- c("LL", "UL", "Date") | |
d3r<-left_join(d2r, posterior.interval.r , by = "Date") | |
credible.interval.r <- cbind.data.frame( | |
as.numeric(apply(lvl.pred.r$distribution, 2,function(f){quantile(f,(50+cred_int/2)/100)})), | |
as.numeric(apply(lvl.pred.r$distribution, 2,function(f){median(f)})), | |
as.numeric(apply(lvl.pred.r$distribution, 2,function(f){quantile(f,(50-cred_int/2)/100)})), | |
subset(d3r, Date>paste(year(max(reg_data$month))-forecastyr, | |
"-",sep = "", | |
paste(month(max(reg_data$month)), | |
"-01", sep = "")))$Date) | |
names(credible.interval.r ) <- c("p75", "Median", "p25", "Date") | |
credible.interval.r %>% | |
ggplot(aes(x=Date))+ | |
geom_line(aes(y=p75, color = "p75"))+ | |
geom_line(aes(y=p25, color = "p25"))+ | |
geom_line(aes(y=Median, color = "Median")) | |
d4r<-left_join(d3r, credible.interval.r , by = "Date") | |
plot(lvl.pred.r) | |
plot(bsts.lvl.r) | |
plot(bsts.lvl.r, "components", same.scale = FALSE) | |
plot(bsts.lvl.r, "coef") | |
ggplot(data=d3r, aes(x=Date)) + | |
geom_line(aes(y=Actual, colour = "Actual"), size=1.2) + | |
geom_line(aes(y=Fitted, colour = "Fitted"), size=1.2, linetype=2) + | |
theme_bw() + theme(legend.title = element_blank()) + ylab("") + xlab("") + | |
geom_ribbon(aes(ymin=LL, ymax=UL), fill="grey", alpha=0.5) + | |
ggtitle(paste0("CMDV MAPE = ", | |
round(100*MAPEr,2), "% ---- Confidence Interval = ", | |
conf_int, | |
"%\nPredicted Water Levels at ", | |
test_station_name)) + | |
theme(axis.text.x=element_text(angle = -90, hjust = 0))+ | |
scale_x_date(limits = as.Date(c(paste(lvl_end_yr-20, | |
"-",(lvl_start_mo), | |
"-01", sep = ""), lvl_end_date), | |
date_breaks = "1 years")) | |
ggplot(data=d4r, aes(x=Date)) + | |
geom_line(aes(y=Actual, colour = "Actual"), size=1.2) + | |
geom_line(aes(y=Fitted, colour = "Fitted"), size=1.2, linetype=2) + | |
theme_bw() + theme(legend.title = element_blank()) + ylab("") + xlab("") + | |
geom_ribbon(aes(ymin=p25, ymax=p75), fill="grey", alpha=0.5) + | |
ggtitle(paste0("CMDV MAPE = ", | |
round(100*MAPEr,2), "% ---- Credible Interval = ", | |
cred_int, | |
"%\nPredicted Water Levels at ", | |
test_station_name)) + | |
theme(axis.text.x=element_text(angle = -90, hjust = 0))+ | |
scale_x_date(limits = as.Date(c(paste(lvl_end_yr-20, | |
"-",(lvl_start_mo), | |
"-01", sep = ""), lvl_end_date), | |
date_breaks = "1 years")) | |
lvl_lm<-lm(lvl_zoo$obs~reg_zoo$ppt_cmdv+reg_zoo$lvl_reg+reg_zoo$ppt_obs) | |
summary(lvl_lm) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment