Skip to content

Instantly share code, notes, and snippets.

View realtor-ut.R
# set working directory
setwd("~/Projects/zillow")
# import libraries
library(tidyverse)
### REALTOR ###
# import realtor.com
# source: https://www.realtor.com/research/data/ (Inventory, Monthly)
View utah-zipcodes.csv
zipcode city county
84001 Altamont Duchesne
84002 Altonah Duchesne
84003 American Fork Utah
84004 Alpine Utah
84005 Eagle Mountain Utah
84006 Bingham Canyon Salt Lake
84007 Bluebell Duchesne
84008 Bonanza Uintah
84010 Bountiful Davis
View fec-campaign-finance-explorer.R
library(tidyverse)
library(rvest)
# define list of input
years = c('1996', '1998', '2000', '2002', '2004', '2006', '2008', '2010', '2012', '2014', '2016', '2018', '2020')
years_abbr = c('96', '98', '00', '02', '04','06', '08', '10', '12', '14', '16', '18', '20')
# loop over years, combine data
# source: https://www.fec.gov/data/browse-data/?tab=bulk-data
setwd("~/Projects/fec")
View acs-B01001-mapping.csv
variable sex age
B01001_003 Male Under 5 Years
B01001_004 Male 5 to 9 Years
B01001_005 Male 10 to 14 Years
B01001_006 Male 15 to 17 Years
B01001_007 Male 18 and 19 Years
B01001_008 Male 20 Years
B01001_009 Male 21 Years
B01001_010 Male 22 to 24 Years
B01001_011 Male 25 to 29 Years
View imdb-character-importance-plot.R
# calculate character % present
character_importance_plot = all_characters %>%
mutate(show = factor(show, levels = c('The Office', 'Parks & Recreation', 'Modern Family', 'Community', 'New Girl', 'The Good Place'))) %>%
mutate(per = character_episodes / total_episodes) %>%
ggplot(., aes(x = reorder(character_name , per), y = per, col = show)) +
geom_point(size = 3) +
coord_flip() +
theme(legend.position = 'none') +
labs(title = 'Character Importance by Show', subtitle = 'IMDb Episode Descriptions, All Seasons',
y = 'Percent Present in Episode Descriptions', x = '', fill = 'Season', caption = 'Data Source: IDMb.com | Author: @erikgregorywebb') +
View imdb-character-importance.R
# define character count function
get_character_counts = function(df, show_name, character_name) {
total_episodes = df %>% filter(show == show_name) %>% nrow()
character_episodes = df %>%
filter(show == show_name) %>% filter(str_detect(description, character_name)) %>% nrow()
result = tibble(show = show_name, character_name = character_name,
total_episodes = total_episodes, character_episodes = character_episodes)
return(result)
}
View imdb-rating-trends-plot.R
View imdb-scrape-clean.R
# define scraper function
scrape_show = function(name, imdb_id, no_seasons) {
datalist = list()
n = 1
for (i in 1:no_seasons) {
Sys.sleep(3)
url = paste('https://www.imdb.com/title/', imdb_id, '/episodes?season=', i , sep = '')
page = read_html(url)
episodes = page %>% html_node('.list') %>% html_nodes('.info')
for (j in 1:length(episodes)) {
View imdb-define-shows.R
# define list of shows
shows = tibble(
name = c('The Office', 'Parks & Recreation', 'Modern Family', 'Community', 'New Girl', 'The Good Place'),
imdb_id = c('tt0386676', 'tt1266020', 'tt1442437', 'tt1439629', 'tt1826940', 'tt4955642'),
no_seasons = c(9, 7, 11, 6, 7, 4),
)
View imdb-packages.R
# import packages
library(tidyverse)
library(rvest)
library(lubridate)
library(scales)