Skip to content

Instantly share code, notes, and snippets.

@Gedevan-Aleksizde
Last active October 18, 2019 05:14
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Gedevan-Aleksizde/9d994625ef9d3f2d843567d8af6a4bae to your computer and use it in GitHub Desktop.
Save Gedevan-Aleksizde/9d994625ef9d3f2d843567d8af6a4bae to your computer and use it in GitHub Desktop.
require(KFAS) # 1.2.9
require(dplyr)
require(tidyr)
require(ggplot2)
require(data.table)
require(zoo)
##### read dataset #####
# https://catalog.data.gov/dataset/allegheny-county-crash-data
# data description
df.crash <- read.csv(file="data/crashdatadictionary.csv",
stringsAsFactors = F, fileEncoding = "utf-8")
df <- list()
for (i in 2004:2014 - 2003){
df[[i]] <- read.csv(file=paste0("data/", i + 2003, "alcocrash.csv"),
stringsAsFactors = F, fileEncoding = "utf-8")
}
df[[length(df) + 1]] <- read.csv(file="data/d90eb4fd-1234-4f3b-ba3d-422769cd3761.csv",
stringsAsFactors = F, fileEncoding = "utf-8")
df[[length(df) + 1]] <- read.csv(file="data/reordered2016crashes.csv", stringsAsFactors = F, fileEncoding = "utf-8")
df <- rbindlist(df, fill = T)
df.mon <- filter(df, COLLISION_TYPE != 0, BICYCLE == 0) %>% group_by(CRASH_YEAR, CRASH_MONTH) %>%
summarise(CRASH=n(),
AGGRESSIVE=sum(AGGRESSIVE_DRIVING),
ALCOHOL=sum(ALCOHOL_RELATED),
PHONE=sum(CELL_PHONE),
CROSS_MEDIAN=sum(CROSS_MEDIAN),
DEER=sum(DEER_RELATED),
DISTRACTED=sum(DISTRACTED),
DRIVER_U_17YR=sum(DRIVER_16YR + DRIVER_17YR >= 1),
DRIVER_75PLUS=sum(DRIVER_75PLUS),
OVERTURNED=sum(OVERTURNED),
DIRT=sum(ROAD_CONDITION==2),
WET=sum(ROAD_CONDITION %in% c(1, 4, 7)),
SNOW_ICE=sum(ROAD_CONDITION %in% c(3, 5, 6)),
WEATHER_BAD=sum(WEATHER %in% 2:8)
) %>% ungroup() %>%
mutate(ym=as.yearmon(paste(CRASH_YEAR, CRASH_MONTH, sep="-")))
##### plot #####
ggplot(gather(df.mon, key = series, value=value, -ym, -CRASH_YEAR, -CRASH_MONTH) %>% filter(series != "CRASH")) +
geom_line(aes(x=ym, y=value, group=series), color="grey") + labs(x="year-month", "count of car crash") +
facet_wrap(~series, scales = "free") + theme_classic()
ggplot(df.mon) + geom_line(aes(x=ym, y=CRASH), color="grey", lwd=1) + labs(x="year-month", y="number of crashes") +
theme_classic()
# monthly
temp <- group_by(df, CRASH_MONTH) %>% summarise(y=n()) %>% ungroup
temp$CRASH_MONTH <- as.integer(temp$CRASH_MONTH)
ggplot(temp) + geom_line(aes(x=CRASH_MONTH, y=y)) + scale_x_continuous(breaks= temp$CRASH_MONTH)
##### modeling #####
# Gaussian case
model1 <- SSModel(formula = CRASH ~ SSMtrend(1, Q=NA) +
SSMseasonal(period = 12, Q=NA),
data=df.mon, H = NA)
model2 <- SSModel(formula = CRASH ~ SSMtrend(1, Q=NA) +
SSMseasonal(period = 12, Q=NA) +
DRIVER_U_17YR + DRIVER_75PLUS + DISTRACTED + PHONE + SNOW_ICE + WEATHER_BAD + WET,
data=df.mon, H = NA)
# Poisson case
model3 <- SSModel(formula = CRASH ~ SSMtrend(1, Q=NA) +
SSMseasonal(period = 12, Q=NA),
distribution = "poisson", data=df.mon)
model4 <- SSModel(formula = CRASH ~ SSMtrend(1, Q=NA) +
SSMseasonal(period = 12, Q=NA) +
DRIVER_U_17YR + DRIVER_75PLUS + DISTRACTED + PHONE + SNOW_ICE + WEATHER_BAD + WET,
distribution = "poisson", data=df.mon)
fit <- function(model, inits, method="BFGS"){
f <- fitSSM(model, inits=inits, method=method)
print(paste("Converged: ", f$optim.out$convergence == 0))
return(f)
}
fit.model1 <- fit(model1, inits=c(1, 1, 1))
fit.model2 <- fit(model2, inits=c(1, 1, 1))
fit.model3 <- fit(model3, inits=c(1, 1, 1))
fit.model4 <- fit(model4, inits=c(.1, .1), "SANN")
# filtering
filter.model1 <- KFS(fit.model1$model, smoothing = c("state", "mean"))
filter.model2 <- KFS(fit.model2$model, smoothing = c("state", "mean"))
filter.model3 <- KFS(fit.model3$model, smoothing = c("state", "mean"))
filter.model4 <- KFS(fit.model4$model, smoothing = c("state", "mean"))
plot.error <- function(list.filter.model, t=NULL){
df <- data.frame(y=list.filter.model[[1]]$model$y,
lapply(list.filter.model, FUN = function(x){return(
if(x$model$distribution == "gaussian"){
cumsum(abs(x$v))
}
else{
cumsum(abs(x$muhat-x$model$y))
}
)}
))
if(!is.null(t)) df$t <- t
else df$t <- 1:nrow(df)
df <- gather(df, key = series, value=value, -t)
df$err.or.y <- ifelse(df$series == "y", "raw data", "cumulative abs. erros")
print(ggplot(df) +
geom_line(aes(x=t, y=value, group=series, color=series)) +
facet_wrap(~err.or.y, scales = "free_y", ncol=1) +
labs(x="", y="") +
theme_classic(legend.position="top"))
}
plot.error(list("model 1"=filter.model1, "model 2"=filter.model2,
"model 3"=filter.model3, "model 4"=filter.model4), t = df.mon$ym)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment