jebyrnes/i_cant_even.R

## i_cant_even.R
library(nlme)
library(ggplot2)
library(lubridate)
library(dplyr)

#download data from https://www.google.com/trends/explore#q=%22i%20cant%20even%22&cmpt=q&tz=Etc%2FGMT%2B5
i_cant_even <- read.csv("./i_cant_even.csv", skip=4)

#reformat weeks
i_cant_even$Week  <- as.character(i_cant_even$Week)
i_cant_even$Week <- gsub(" - \\d\\d\\d\\d-\\d\\d-\\d\\d", "", i_cant_even$Week)

#get rid extra info at end
strStart <- grep("[:lower:]", i_cant_even$Week)[1]
i_cant_even <- i_cant_even[-c(strStart:nrow(i_cant_even)),]

#get rid of last week of incomplete data
i_cant_even <- i_cant_even[-nrow(i_cant_even),]

#make a real date
i_cant_even$Date <- ymd(i_cant_even$Week)

#make the class on the y axis correct
i_cant_even$i.cant.even <- as.numeric(i_cant_even$i.cant.even)
i_cant_even$lag.i.cant.even <- lag(i_cant_even$i.cant.even)

#make a plot for funsies
ggplot(i_cant_even, mapping=aes(x=Date, y=i.cant.even)) +
  geom_line(color="red") +
  theme_minimal()

####################
# Models can't even
####################
i_cant_even_model <- lme(i.cant.even ~ Date+lag.i.cant.even, random =~ 1|as.factor(Date),
                         data=i_cant_even, na.action=na.exclude)


a <- predict(i_cant_even_model, level=0)

#plot predicted v. observed
matplot(cbind(a, i_cant_even$i.cant.even), type="l", xlab="Time", ylab="How many can't even")

####################
# Forecast, using autoregressive
####################
#setup the prediction data frame
future_cant_even <- data.frame(Date = max(i_cant_even$Date)+days(7)*1:1000,
                               lag.i.cant.even = NA,
                               i.cant.even = NA)
future_cant_even$lag.i.cant.even[1] <- i_cant_even$i.cant.even[nrow(i_cant_even)]
future_cant_even$i.cant.even[1] <- predict(i_cant_even_model, newdata=future_cant_even[1,], level=0)

for(i in 2:nrow(future_cant_even)){
  #add a lag
  future_cant_even$lag.i.cant.even[i] <- future_cant_even$i.cant.even[i-1]
  #make a prediction
  future_cant_even$i.cant.even[i] <- predict(i_cant_even_model, newdata=future_cant_even[i,], level=0)
  #add some variability
  future_cant_even$i.cant.even[i] <- rnorm(1, future_cant_even$i.cant.even[i], i_cant_even_model$sigma^2)
}

i_cant_even$Type="Observed"
future_cant_even$Type = "Predicted"
all_cant_even <- plyr::rbind.fill(i_cant_even, future_cant_even)

#make a plot for funsies
ggplot(all_cant_even, mapping=aes(x=Date, y=i.cant.even, lty=Type)) +
  geom_line(color="red") +
  theme_minimal()
	library(nlme)
	library(ggplot2)
	library(lubridate)
	library(dplyr)

	#download data from https://www.google.com/trends/explore#q=%22i%20cant%20even%22&cmpt=q&tz=Etc%2FGMT%2B5
	i_cant_even <- read.csv("./i_cant_even.csv", skip=4)

	#reformat weeks
	i_cant_even$Week <- as.character(i_cant_even$Week)
	i_cant_even$Week <- gsub(" - \\d\\d\\d\\d-\\d\\d-\\d\\d", "", i_cant_even$Week)

	#get rid extra info at end
	strStart <- grep("[:lower:]", i_cant_even$Week)[1]
	i_cant_even <- i_cant_even[-c(strStart:nrow(i_cant_even)),]

	#get rid of last week of incomplete data
	i_cant_even <- i_cant_even[-nrow(i_cant_even),]

	#make a real date
	i_cant_even$Date <- ymd(i_cant_even$Week)

	#make the class on the y axis correct
	i_cant_even$i.cant.even <- as.numeric(i_cant_even$i.cant.even)
	i_cant_even$lag.i.cant.even <- lag(i_cant_even$i.cant.even)

	#make a plot for funsies
	ggplot(i_cant_even, mapping=aes(x=Date, y=i.cant.even)) +
	geom_line(color="red") +
	theme_minimal()

	####################
	# Models can't even
	####################
	i_cant_even_model <- lme(i.cant.even ~ Date+lag.i.cant.even, random =~ 1\|as.factor(Date),
	data=i_cant_even, na.action=na.exclude)


	a <- predict(i_cant_even_model, level=0)

	#plot predicted v. observed
	matplot(cbind(a, i_cant_even$i.cant.even), type="l", xlab="Time", ylab="How many can't even")

	####################
	# Forecast, using autoregressive
	####################
	#setup the prediction data frame
	future_cant_even <- data.frame(Date = max(i_cant_even$Date)+days(7)*1:1000,
	lag.i.cant.even = NA,
	i.cant.even = NA)
	future_cant_even$lag.i.cant.even[1] <- i_cant_even$i.cant.even[nrow(i_cant_even)]
	future_cant_even$i.cant.even[1] <- predict(i_cant_even_model, newdata=future_cant_even[1,], level=0)

	for(i in 2:nrow(future_cant_even)){
	#add a lag
	future_cant_even$lag.i.cant.even[i] <- future_cant_even$i.cant.even[i-1]
	#make a prediction
	future_cant_even$i.cant.even[i] <- predict(i_cant_even_model, newdata=future_cant_even[i,], level=0)
	#add some variability
	future_cant_even$i.cant.even[i] <- rnorm(1, future_cant_even$i.cant.even[i], i_cant_even_model$sigma^2)
	}

	i_cant_even$Type="Observed"
	future_cant_even$Type = "Predicted"
	all_cant_even <- plyr::rbind.fill(i_cant_even, future_cant_even)

	#make a plot for funsies
	ggplot(all_cant_even, mapping=aes(x=Date, y=i.cant.even, lty=Type)) +
	geom_line(color="red") +
	theme_minimal()