This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
num_df <- num_df %>% | |
mutate(years_since_release = 2017 - title_year) %>% | |
select(-title_year) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df %>% | |
group_by(title_year) %>% | |
summarise(mean_rating = mean(imdb_score), | |
upper_rating = quantile(imdb_score, 0.975), | |
lower_rating = quantile(imdb_score, 0.0275)) %>% | |
ggplot(aes(title_year, mean_rating)) + | |
geom_line(colour = "dodger blue") + | |
geom_point(alpha = 0.5) + | |
geom_smooth(method = "lm", colour = "red", alpha = 0.6, se = FALSE) + | |
geom_errorbar(aes(ymin = upper_rating, ymax = lower_rating)) + |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
correlations <- corrr::correlate(num_df) %>% | |
gather(variable, correlation, 2:16) %>% | |
select(rowname, variable, correlation) %>% | |
mutate(high_correlation = ifelse(abs(correlation) > 0.50, "high", "not so high")) | |
correlations %>% | |
ggplot(aes(reorder(rowname, correlation), reorder(variable, correlation), fill = correlation)) + | |
geom_tile(alpha = 0.6, colour = "black") + | |
geom_text(aes(label = round(correlation, 2), colour = high_correlation)) + | |
theme_minimal() + |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fit <- num_df %>% | |
map(scale) %>% | |
as.data.frame() %>% | |
bootstrap(10) %>% | |
do(tidy(lm(imdb_score ~., data = .), conf.int = TRUE)) %>% | |
mutate(sig_0005 = p.value < 0.005) | |
fit %>% | |
filter(term != "(Intercept)") %>% | |
group_by(term) %>% |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fit %>% | |
filter(term != "(Intercept)") %>% | |
group_by(term) %>% | |
summarise(estimate = mean(estimate), | |
conf.low = mean(conf.low), | |
conf.high = mean(conf.high), | |
sig_0005 = ifelse(sum(sig_0005) > 6, "significant (< 0.005)", "not significant")) %>% | |
ggplot(aes(y = reorder(term, estimate), x = estimate, colour = sig_0005)) + | |
geom_point() + | |
geom_errorbarh(aes(xmax = conf.high, xmin = conf.low)) + |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(here) | |
library(broom) | |
library(corrr) | |
library(forcats) | |
library(stringr) | |
library(lubridate) | |
library(gridExtra) | |
df <- read_csv("movie_metadata.csv") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
library(ggplot2) | |
n <- 200 # number of observations | |
bias <- 4 | |
slope <- 3.5 | |
dot <- `%*%` # defined for personal preference | |
x <- rnorm(n) * 2 | |
x_b <- cbind(x, rep(1, n)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for (iteration in seq_len(n_iterations)) { | |
yhat <- dot(x_b, theta) # predict using weights in theta | |
residuals_b <- yhat - y # calculate the residuals | |
gradients <- 2/n * dot(t(x_b), residuals_b) # calculate the gradients of MSE w.r.t model weights | |
theta <- theta - learning_rate * gradients # update theta | |
sse_i[[iteration]] <- sum((y - dot(x_b, theta))**2) | |
b0[[iteration]] <- theta[2] | |
b1[[iteration]] <- theta[1] | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
p1 <- df %>% | |
ggplot(aes(x=x, y=y)) + | |
geom_abline(aes(intercept = b0, | |
slope = b1, | |
colour = -sse, | |
frame = model_iter), | |
data = model_i, | |
alpha = .50 | |
) + | |
geom_point(alpha = 0.4) + |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
predict_from_theta <- function(x) { | |
x <- cbind(x, rep(1, length(x))) | |
dot(x, theta) | |
} | |
predict_from_theta(rnorm(10)) | |
[,1] | |
[1,] -1.530065 |