Skip to content

Instantly share code, notes, and snippets.

num_df <- num_df %>%
mutate(years_since_release = 2017 - title_year) %>%
select(-title_year)
df %>%
group_by(title_year) %>%
summarise(mean_rating = mean(imdb_score),
upper_rating = quantile(imdb_score, 0.975),
lower_rating = quantile(imdb_score, 0.0275)) %>%
ggplot(aes(title_year, mean_rating)) +
geom_line(colour = "dodger blue") +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", colour = "red", alpha = 0.6, se = FALSE) +
geom_errorbar(aes(ymin = upper_rating, ymax = lower_rating)) +
correlations <- corrr::correlate(num_df) %>%
gather(variable, correlation, 2:16) %>%
select(rowname, variable, correlation) %>%
mutate(high_correlation = ifelse(abs(correlation) > 0.50, "high", "not so high"))
correlations %>%
ggplot(aes(reorder(rowname, correlation), reorder(variable, correlation), fill = correlation)) +
geom_tile(alpha = 0.6, colour = "black") +
geom_text(aes(label = round(correlation, 2), colour = high_correlation)) +
theme_minimal() +
fit <- num_df %>%
map(scale) %>%
as.data.frame() %>%
bootstrap(10) %>%
do(tidy(lm(imdb_score ~., data = .), conf.int = TRUE)) %>%
mutate(sig_0005 = p.value < 0.005)
fit %>%
filter(term != "(Intercept)") %>%
group_by(term) %>%
fit %>%
filter(term != "(Intercept)") %>%
group_by(term) %>%
summarise(estimate = mean(estimate),
conf.low = mean(conf.low),
conf.high = mean(conf.high),
sig_0005 = ifelse(sum(sig_0005) > 6, "significant (< 0.005)", "not significant")) %>%
ggplot(aes(y = reorder(term, estimate), x = estimate, colour = sig_0005)) +
geom_point() +
geom_errorbarh(aes(xmax = conf.high, xmin = conf.low)) +
library(tidyverse)
library(here)
library(broom)
library(corrr)
library(forcats)
library(stringr)
library(lubridate)
library(gridExtra)
df <- read_csv("movie_metadata.csv")
library(dplyr)
library(ggplot2)
n <- 200 # number of observations
bias <- 4
slope <- 3.5
dot <- `%*%` # defined for personal preference
x <- rnorm(n) * 2
x_b <- cbind(x, rep(1, n))
for (iteration in seq_len(n_iterations)) {
yhat <- dot(x_b, theta) # predict using weights in theta
residuals_b <- yhat - y # calculate the residuals
gradients <- 2/n * dot(t(x_b), residuals_b) # calculate the gradients of MSE w.r.t model weights
theta <- theta - learning_rate * gradients # update theta
sse_i[[iteration]] <- sum((y - dot(x_b, theta))**2)
b0[[iteration]] <- theta[2]
b1[[iteration]] <- theta[1]
p1 <- df %>%
ggplot(aes(x=x, y=y)) +
geom_abline(aes(intercept = b0,
slope = b1,
colour = -sse,
frame = model_iter),
data = model_i,
alpha = .50
) +
geom_point(alpha = 0.4) +
predict_from_theta <- function(x) {
x <- cbind(x, rep(1, length(x)))
dot(x, theta)
}
predict_from_theta(rnorm(10))
[,1]
[1,] -1.530065