Skip to content

Instantly share code, notes, and snippets.

@StaffanBetner
Last active November 6, 2017 14:39
Show Gist options
  • Save StaffanBetner/258e083b924b4959d289aa241fe3e83a to your computer and use it in GitHub Desktop.
Save StaffanBetner/258e083b924b4959d289aa241fe3e83a to your computer and use it in GitHub Desktop.
IMDb TV Show Rating/Votes Plot
install.packages(c("tidyverse", "rvest", "fANCOVA"))
library(tidyverse)
library(rvest)
library(fANCOVA)
imdb_tvshow_ratings_plot <- function(id, votes=FALSE){
obj <- paste("http://www.imdb.com/title/", id, "/epdate", sep = "") %>%
read_html()
obj <- obj %>%
html_table(dec = ",", trim = T) %>%
.[[1]] %>% .[, -5] %>%
mutate(Title = Episode) %>% select(-Episode) %>%
separate(col = `#`, into = c("Season", "Episode")) %>%
transmute(
Season = Season %>% factor(levels = order(.)),
Episode = Episode %>% parse_number,
`UserRating`,
Title
) %>%
bind_cols(
obj %>% html_table(dec = ".", trim = T) %>%
.[[1]] %>% .[, -5] %>% transmute(UserVotes = UserVotes %>% parse_number())
) %>%
na.omit %>%
arrange(Season, Episode) %>%
rownames_to_column() %>%
mutate(`Episode Number` = rowname) %>%
select(-rowname) %>%
mutate(
UserRating = UserRating %>% parse_number(),
`Episode Number` = `Episode Number` %>% parse_number()
)
if (votes == FALSE) {
span1 <-
fANCOVA::loess.as(
x = obj$`Episode Number`,
y = obj$UserRating,
criterion = "gcv"
)$pars$span
span2 <-
fANCOVA::loess.as(
x = obj$`Episode Number`,
y = obj$UserRating,
criterion = "aicc"
)$pars$span
span <- exp(mean(log(c(span1, span2))))
} else{
span1 <-
fANCOVA::loess.as(
x = obj$`Episode Number`,
y = obj$UserVotes,
criterion = "gcv"
)$pars$span
span2 <-
fANCOVA::loess.as(
x = obj$`Episode Number`,
y = obj$UserVotes,
criterion = "aicc"
)$pars$span
span <- exp(mean(log(c(span1, span2))))
}
if (votes == FALSE) {
obj <-
obj %>% ggplot(aes(
x = `Episode Number`,
y = UserRating,
color = Season,
group = Season
)) +
scale_y_continuous(limits = c(1, 10), breaks = 1:10) +
stat_smooth(geom = "line", se = F, alpha = 0.8) +
geom_point(alpha = 0.5) +
labs(y = "IMDB Rating") +
stat_smooth(geom = "line",
aes(group = c(0)),
alpha = 0.4,
span = span)
obj
}
else{
obj <-
obj %>% ggplot(aes(
x = `Episode Number`,
y = UserVotes,
color = Season,
group = Season
)) +
scale_y_continuous(limits = c(0, NA)) +
stat_smooth(geom = "line", se = F, alpha = 0.8) +
geom_point(alpha = 0.5) +
labs(y = "Number of votes per episode") +
stat_smooth(geom = "line",
aes(group = c(0)),
alpha = 0.4,
span = span)
obj
}
}
## Example
imdb_tvshow_ratings_plot(id = "tt1266020",
votes = F)+
ggtitle("Parks and Recreation")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment