Conor conormm

## create_years_since_var.r
num_df <- num_df %>%
  mutate(years_since_release = 2017 - title_year) %>%
  select(-title_year)

## time_score_plt.r
df %>%
  group_by(title_year) %>%
  summarise(mean_rating = mean(imdb_score),
            upper_rating = quantile(imdb_score, 0.975),
            lower_rating = quantile(imdb_score, 0.0275)) %>%
  ggplot(aes(title_year, mean_rating)) +
  geom_line(colour = "dodger blue") +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm", colour = "red", alpha = 0.6, se = FALSE) +
  geom_errorbar(aes(ymin = upper_rating, ymax = lower_rating)) +

## correlations_movie_data.r
correlations <- corrr::correlate(num_df) %>%
    gather(variable, correlation, 2:16) %>%
    select(rowname, variable, correlation) %>%
    mutate(high_correlation = ifelse(abs(correlation) > 0.50, "high", "not so high"))

correlations %>%
  ggplot(aes(reorder(rowname, correlation), reorder(variable, correlation), fill = correlation)) +
  geom_tile(alpha = 0.6, colour = "black") +
  geom_text(aes(label = round(correlation, 2), colour = high_correlation)) +
  theme_minimal() +

## tidy_fit.r
fit <- num_df %>%
  map(scale) %>%
  as.data.frame() %>%
  bootstrap(10) %>%
  do(tidy(lm(imdb_score ~., data = .), conf.int = TRUE)) %>%
  mutate(sig_0005 = p.value < 0.005)

fit %>%
  filter(term != "(Intercept)") %>%
  group_by(term) %>%

## fit_average_estimates.r
fit %>%
  filter(term != "(Intercept)") %>%
  group_by(term) %>%
  summarise(estimate = mean(estimate),
            conf.low = mean(conf.low),
            conf.high = mean(conf.high),
            sig_0005 = ifelse(sum(sig_0005) > 6, "significant (< 0.005)", "not significant")) %>%
  ggplot(aes(y = reorder(term, estimate), x = estimate, colour = sig_0005)) +
  geom_point() +
  geom_errorbarh(aes(xmax = conf.high, xmin = conf.low)) +

## imdb_post_code.r
library(tidyverse)
library(here)
library(broom)
library(corrr)
library(forcats)
library(stringr)
library(lubridate)
library(gridExtra)

df <- read_csv("movie_metadata.csv")

## data_def_lrml.R
library(dplyr)
library(ggplot2)

n     <- 200 # number of observations
bias  <- 4
slope <- 3.5
dot   <- `%*%` # defined for personal preference

x   <- rnorm(n) * 2
x_b <- cbind(x, rep(1, n))

## learning_loop_lrml.R
for (iteration in seq_len(n_iterations)) {
  yhat               <- dot(x_b, theta)          # predict using weights in theta
  residuals_b        <- yhat - y                 # calculate the residuals
  gradients          <- 2/n * dot(t(x_b), residuals_b) # calculate the gradients of MSE w.r.t model weights
  theta              <- theta - learning_rate * gradients # update theta

  sse_i[[iteration]] <- sum((y - dot(x_b, theta))**2)
  b0[[iteration]]    <- theta[2]
  b1[[iteration]]    <- theta[1]


## learning_plots_lrml.R
p1 <- df %>%
  ggplot(aes(x=x, y=y)) +
  geom_abline(aes(intercept = b0,
                  slope = b1,
                  colour = -sse,
                  frame = model_iter),
              data = model_i,
              alpha = .50
              ) +
  geom_point(alpha = 0.4) +

## predict_example_lrml.R
predict_from_theta <- function(x) {

  x <- cbind(x, rep(1, length(x)))
  dot(x, theta)

}

predict_from_theta(rnorm(10))
           [,1]
 [1,] -1.530065
	num_df <- num_df %>%
	mutate(years_since_release = 2017 - title_year) %>%
	select(-title_year)
	df %>%
	group_by(title_year) %>%
	summarise(mean_rating = mean(imdb_score),
	upper_rating = quantile(imdb_score, 0.975),
	lower_rating = quantile(imdb_score, 0.0275)) %>%
	ggplot(aes(title_year, mean_rating)) +
	geom_line(colour = "dodger blue") +
	geom_point(alpha = 0.5) +
	geom_smooth(method = "lm", colour = "red", alpha = 0.6, se = FALSE) +
	geom_errorbar(aes(ymin = upper_rating, ymax = lower_rating)) +
	correlations <- corrr::correlate(num_df) %>%
	gather(variable, correlation, 2:16) %>%
	select(rowname, variable, correlation) %>%
	mutate(high_correlation = ifelse(abs(correlation) > 0.50, "high", "not so high"))

	correlations %>%
	ggplot(aes(reorder(rowname, correlation), reorder(variable, correlation), fill = correlation)) +
	geom_tile(alpha = 0.6, colour = "black") +
	geom_text(aes(label = round(correlation, 2), colour = high_correlation)) +
	theme_minimal() +
	fit <- num_df %>%
	map(scale) %>%
	as.data.frame() %>%
	bootstrap(10) %>%
	do(tidy(lm(imdb_score ~., data = .), conf.int = TRUE)) %>%
	mutate(sig_0005 = p.value < 0.005)

	fit %>%
	filter(term != "(Intercept)") %>%
	group_by(term) %>%
	library(tidyverse)
	library(here)
	library(broom)
	library(corrr)
	library(forcats)
	library(stringr)
	library(lubridate)
	library(gridExtra)

	df <- read_csv("movie_metadata.csv")
	library(dplyr)
	library(ggplot2)

	n <- 200 # number of observations
	bias <- 4
	slope <- 3.5
	dot <- `%*%` # defined for personal preference

	x <- rnorm(n) * 2
	x_b <- cbind(x, rep(1, n))
	for (iteration in seq_len(n_iterations)) {
	yhat <- dot(x_b, theta) # predict using weights in theta
	residuals_b <- yhat - y # calculate the residuals
	gradients <- 2/n * dot(t(x_b), residuals_b) # calculate the gradients of MSE w.r.t model weights
	theta <- theta - learning_rate * gradients # update theta

	sse_i[[iteration]] <- sum((y - dot(x_b, theta))**2)
	b0[[iteration]] <- theta[2]
	b1[[iteration]] <- theta[1]
	p1 <- df %>%
	ggplot(aes(x=x, y=y)) +
	geom_abline(aes(intercept = b0,
	slope = b1,
	colour = -sse,
	frame = model_iter),
	data = model_i,
	alpha = .50
	) +
	geom_point(alpha = 0.4) +
	predict_from_theta <- function(x) {

	x <- cbind(x, rep(1, length(x)))
	dot(x, theta)

	}

	predict_from_theta(rnorm(10))
	[,1]
	[1,] -1.530065