Skip to content

Instantly share code, notes, and snippets.

View fec-campaign-finance-explorer.R
library(tidyverse)
library(rvest)
# define list of input
years = c('1996', '1998', '2000', '2002', '2004', '2006', '2008', '2010', '2012', '2014', '2016', '2018', '2020')
years_abbr = c('96', '98', '00', '02', '04','06', '08', '10', '12', '14', '16', '18', '20')
# loop over years, combine data
# source: https://www.fec.gov/data/browse-data/?tab=bulk-data
setwd("~/Projects/fec")
View acs-B01001-mapping.csv
variable sex age
B01001_003 Male Under 5 Years
B01001_004 Male 5 to 9 Years
B01001_005 Male 10 to 14 Years
B01001_006 Male 15 to 17 Years
B01001_007 Male 18 and 19 Years
B01001_008 Male 20 Years
B01001_009 Male 21 Years
B01001_010 Male 22 to 24 Years
B01001_011 Male 25 to 29 Years
View imdb-character-importance-plot.R
# calculate character % present
character_importance_plot = all_characters %>%
mutate(show = factor(show, levels = c('The Office', 'Parks & Recreation', 'Modern Family', 'Community', 'New Girl', 'The Good Place'))) %>%
mutate(per = character_episodes / total_episodes) %>%
ggplot(., aes(x = reorder(character_name , per), y = per, col = show)) +
geom_point(size = 3) +
coord_flip() +
theme(legend.position = 'none') +
labs(title = 'Character Importance by Show', subtitle = 'IMDb Episode Descriptions, All Seasons',
y = 'Percent Present in Episode Descriptions', x = '', fill = 'Season', caption = 'Data Source: IDMb.com | Author: @erikgregorywebb') +
View imdb-character-importance.R
# define character count function
get_character_counts = function(df, show_name, character_name) {
total_episodes = df %>% filter(show == show_name) %>% nrow()
character_episodes = df %>%
filter(show == show_name) %>% filter(str_detect(description, character_name)) %>% nrow()
result = tibble(show = show_name, character_name = character_name,
total_episodes = total_episodes, character_episodes = character_episodes)
return(result)
}
View imdb-rating-trends-plot.R
View imdb-scrape-clean.R
# define scraper function
scrape_show = function(name, imdb_id, no_seasons) {
datalist = list()
n = 1
for (i in 1:no_seasons) {
Sys.sleep(3)
url = paste('https://www.imdb.com/title/', imdb_id, '/episodes?season=', i , sep = '')
page = read_html(url)
episodes = page %>% html_node('.list') %>% html_nodes('.info')
for (j in 1:length(episodes)) {
View imdb-define-shows.R
# define list of shows
shows = tibble(
name = c('The Office', 'Parks & Recreation', 'Modern Family', 'Community', 'New Girl', 'The Good Place'),
imdb_id = c('tt0386676', 'tt1266020', 'tt1442437', 'tt1439629', 'tt1826940', 'tt4955642'),
no_seasons = c(9, 7, 11, 6, 7, 4),
)
View imdb-packages.R
# import packages
library(tidyverse)
library(rvest)
library(lubridate)
library(scales)
View imdb-all.R
# import packages
library(tidyverse)
library(rvest)
library(lubridate)
library(scales)
# define list of shows
shows = tibble(
name = c('The Office', 'Parks & Recreation', 'Modern Family', 'Community', 'New Girl', 'The Good Place'),
imdb_id = c('tt0386676', 'tt1266020', 'tt1442437', 'tt1439629', 'tt1826940', 'tt4955642'),
View mouse-backup.R
# import
library(rJava)
library(rMouse)
Sys.setenv(JAVA_HOME='C:/Program Files/Java/jre1.8.0_241') # for 64-bit version
# loop
condition = FALSE
while(condition == FALSE) {
Sys.sleep(30)