library(readr) | |
library(dplyr) | |
library(lubridate) | |
library(ggplot2) | |
library(tidyr) | |
data <- read_csv("~/traffic_2018-Jun-20_2018-Oct-18.csv") | |
data <- data %>% | |
mutate(Week = lubridate::isoweek(date), | |
DayOfWeek = weekdays(date)) | |
data %>% | |
filter(DayOfWeek == "Wednesday") %>% | |
ggplot(aes(x = Week, y = `page views`)) + | |
geom_line() | |
daysData <- data %>% | |
select(Week, DayOfWeek, `page views`) %>% | |
spread(DayOfWeek, `page views`) %>% | |
select(Week, Monday, Wednesday) %>% | |
filter(!is.na(Monday) & !is.na(Wednesday)) | |
daysData %>% | |
mutate(RatioWedToMon = Wednesday / Monday) %>% | |
select(Week, Monday, Wednesday, RatioWedToMon) | |
#Exclude current week from data | |
train <- daysData %>% filter(Week != 42) | |
train %>% | |
transmute(Ratio = Wednesday / Monday) %>% | |
summary | |
model <- lm(data = train, formula = Wednesday ~ Monday) | |
summary(model) | |
expected <- predict(model, daysData %>% filter(Week == 42)) | |
actual <- daysData %>% | |
filter(Week == 42) %>% | |
select(Wednesday) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment