Created
December 8, 2015 21:39
-
-
Save jebyrnes/f2df17f15c58e2483459 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(nlme) | |
library(ggplot2) | |
library(lubridate) | |
library(dplyr) | |
#download data from https://www.google.com/trends/explore#q=%22i%20cant%20even%22&cmpt=q&tz=Etc%2FGMT%2B5 | |
i_cant_even <- read.csv("./i_cant_even.csv", skip=4) | |
#reformat weeks | |
i_cant_even$Week <- as.character(i_cant_even$Week) | |
i_cant_even$Week <- gsub(" - \\d\\d\\d\\d-\\d\\d-\\d\\d", "", i_cant_even$Week) | |
#get rid extra info at end | |
strStart <- grep("[:lower:]", i_cant_even$Week)[1] | |
i_cant_even <- i_cant_even[-c(strStart:nrow(i_cant_even)),] | |
#get rid of last week of incomplete data | |
i_cant_even <- i_cant_even[-nrow(i_cant_even),] | |
#make a real date | |
i_cant_even$Date <- ymd(i_cant_even$Week) | |
#make the class on the y axis correct | |
i_cant_even$i.cant.even <- as.numeric(i_cant_even$i.cant.even) | |
i_cant_even$lag.i.cant.even <- lag(i_cant_even$i.cant.even) | |
#make a plot for funsies | |
ggplot(i_cant_even, mapping=aes(x=Date, y=i.cant.even)) + | |
geom_line(color="red") + | |
theme_minimal() | |
#################### | |
# Models can't even | |
#################### | |
i_cant_even_model <- lme(i.cant.even ~ Date+lag.i.cant.even, random =~ 1|as.factor(Date), | |
data=i_cant_even, na.action=na.exclude) | |
a <- predict(i_cant_even_model, level=0) | |
#plot predicted v. observed | |
matplot(cbind(a, i_cant_even$i.cant.even), type="l", xlab="Time", ylab="How many can't even") | |
#################### | |
# Forecast, using autoregressive | |
#################### | |
#setup the prediction data frame | |
future_cant_even <- data.frame(Date = max(i_cant_even$Date)+days(7)*1:1000, | |
lag.i.cant.even = NA, | |
i.cant.even = NA) | |
future_cant_even$lag.i.cant.even[1] <- i_cant_even$i.cant.even[nrow(i_cant_even)] | |
future_cant_even$i.cant.even[1] <- predict(i_cant_even_model, newdata=future_cant_even[1,], level=0) | |
for(i in 2:nrow(future_cant_even)){ | |
#add a lag | |
future_cant_even$lag.i.cant.even[i] <- future_cant_even$i.cant.even[i-1] | |
#make a prediction | |
future_cant_even$i.cant.even[i] <- predict(i_cant_even_model, newdata=future_cant_even[i,], level=0) | |
#add some variability | |
future_cant_even$i.cant.even[i] <- rnorm(1, future_cant_even$i.cant.even[i], i_cant_even_model$sigma^2) | |
} | |
i_cant_even$Type="Observed" | |
future_cant_even$Type = "Predicted" | |
all_cant_even <- plyr::rbind.fill(i_cant_even, future_cant_even) | |
#make a plot for funsies | |
ggplot(all_cant_even, mapping=aes(x=Date, y=i.cant.even, lty=Type)) + | |
geom_line(color="red") + | |
theme_minimal() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment