# Load dplyr
library(dplyr)
# Load ggplot2
library(ggplot2)
# Load movies
movies <- read.csv("~/movies.csv")
# Quick summary of the data
head(movies)
# Find movies that have a genre of type 'Drama'
movies %>%
filter(genres == 'Drama') %>%
head()
# Filter by year and genre
movies %>%
filter(grepl("2014", title), grepl("Drama", genres)) %>%
head()
# Turn the above code into a function
byYear <- function(movies, y) {
movies %>%
filter(grepl(y, title))
}
byGenreAndYear <- function(movies, g, y) {
movies %>%
byYear(y) %>% filter(grepl(g, genres))
}
# Count the number of movies
movieCount <- function(movies) {
movies %>% summarize(n = n())
}
# Combine
movies %>%
byGenreAndYear(2014, "Drama") %>%
movieCount()
m <- movies %>% byYear(2015) %>% head(100)
# Not readable
ggplot(m) + geom_bar(aes(m$genres))
# Much better
ggplot(m, aes(m$genres)) + geom_bar() + coord_flip()
# Bring down the number of items
n <- movies %>% byGenreAndYear("Horror", 2015)
ggplot(n, aes(n$genres)) + geom_bar() + coord_flip()