Skip to content

Instantly share code, notes, and snippets.

@kelly-sovacool
Created March 31, 2020 16:59
Show Gist options
  • Save kelly-sovacool/ecf79bb9aa4a35fc7fd94cd5e2e37896 to your computer and use it in GitHub Desktop.
Save kelly-sovacool/ecf79bb9aa4a35fc7fd94cd5e2e37896 to your computer and use it in GitHub Desktop.
Joy of Coding: Nick's code club 2020-03-30 w/ Will & Katie
library(tidyverse)
ross_data <- read_csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/bob-ross/elements-by-episode.csv",
col_types="ccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd") %>%
mutate(TITLE = gsub("\\\"", '', TITLE))
themes <- c('MOUNTAIN', 'WINTER', 'AUTUMN', 'LAKE', 'CABIN')
#############################################################
# Part 1 - fill in the blanks each line of the function
#############################################################
# function sums the features (columns) by the descriptors in the title
get_feature_abundance_by_theme <- function(descriptor){
temp_dataframe <- ross_data %>%
filter(grepl(descriptor, TITLE)) %>%
select(-EPISODE, -TITLE) %>%
gather(feature, presence) %>%
group_by(feature) %>%
summarize(abundance = sum(presence)) %>%
mutate(theme = descriptor) %>%
arrange(desc(abundance))
return(temp_dataframe)
}
#############################################################
# Part 2 - comment the the function
#############################################################
get_feature_abundance_by_theme <- function(descriptor){
temp_dataframe <- ross_data %>%
filter(grepl(descriptor, TITLE)) %>%
select(-EPISODE, -TITLE) %>%
gather(feature, presence) %>%
group_by(feature) %>%
summarise(abundance = sum(presence)) %>%
mutate(theme = descriptor) %>%
arrange(desc(abundance))
return(temp_dataframe)
}
#############################################################
# Part 3 - Run this function on each of the themes and output a single dataframe
#############################################################
# Using a for loop
# Using an apply
lapply(themes, get_feature_abundance_by_theme) %>%
reduce(rbind)
# Using a map function
map_df(themes, get_feature_abundance_by_theme)
#############################################################
# Part 4 - Run this function on all themes
#############################################################
# you can use the following code to pick the unique themes
all_themes <- ross_data %>%
separate(TITLE, c('word1', 'word2', 'word3', 'word4', 'word5')) %>%
gather(place, word, contains('word')) %>%
filter(!is.na(word)) %>%
filter(!(word %in% c('A', 'THE', 'IN'))) %>%
unique() %>%
pull(word)
# this takes a little while, 1-2 mins
map_df(all_themes, get_feature_abundance_by_theme)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment