kelly-sovacool/bob_ross.R

## bob_ross.R
library(tidyverse)

ross_data <- read_csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/bob-ross/elements-by-episode.csv",
	col_types="ccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd") %>%
	mutate(TITLE = gsub("\\\"", '', TITLE))

themes <- c('MOUNTAIN', 'WINTER', 'AUTUMN', 'LAKE', 'CABIN')

#############################################################
#  Part 1 - fill in the blanks each line of the function
#############################################################

# function sums the features (columns) by the descriptors in the title

get_feature_abundance_by_theme <- function(descriptor){
		temp_dataframe <- ross_data %>%
		filter(grepl(descriptor, TITLE)) %>%
		select(-EPISODE, -TITLE) %>%
		gather(feature, presence) %>%
		group_by(feature) %>%
		summarize(abundance = sum(presence)) %>%
		mutate(theme = descriptor) %>%
		arrange(desc(abundance))
		return(temp_dataframe)
}

#############################################################
#  Part 2 - comment the the function
#############################################################

get_feature_abundance_by_theme <- function(descriptor){
		temp_dataframe <- ross_data %>%
		filter(grepl(descriptor, TITLE)) %>%
		select(-EPISODE, -TITLE) %>%
		gather(feature, presence) %>%
		group_by(feature) %>%
		summarise(abundance = sum(presence)) %>%
		mutate(theme = descriptor) %>%
		arrange(desc(abundance))
		return(temp_dataframe)
}

#############################################################
#  Part 3 - Run this function on each of the themes and output a single dataframe
#############################################################

# Using a for loop


# Using an apply
lapply(themes, get_feature_abundance_by_theme) %>%
    reduce(rbind)

# Using a map function
map_df(themes, get_feature_abundance_by_theme)


#############################################################
#  Part 4 - Run this function on all themes
#############################################################

# you can use the following code to pick the unique themes
all_themes <- ross_data %>%
	separate(TITLE, c('word1', 'word2', 'word3', 'word4', 'word5')) %>%
	gather(place, word, contains('word')) %>%
	filter(!is.na(word)) %>%
    filter(!(word %in% c('A', 'THE', 'IN'))) %>%
	unique() %>%
    pull(word)

# this takes a little while, 1-2 mins
map_df(all_themes, get_feature_abundance_by_theme)
	library(tidyverse)

	ross_data <- read_csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/bob-ross/elements-by-episode.csv",
	col_types="ccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd") %>%
	mutate(TITLE = gsub("\\\"", '', TITLE))

	themes <- c('MOUNTAIN', 'WINTER', 'AUTUMN', 'LAKE', 'CABIN')

	#############################################################
	# Part 1 - fill in the blanks each line of the function
	#############################################################

	# function sums the features (columns) by the descriptors in the title

	get_feature_abundance_by_theme <- function(descriptor){
	temp_dataframe <- ross_data %>%
	filter(grepl(descriptor, TITLE)) %>%
	select(-EPISODE, -TITLE) %>%
	gather(feature, presence) %>%
	group_by(feature) %>%
	summarize(abundance = sum(presence)) %>%
	mutate(theme = descriptor) %>%
	arrange(desc(abundance))
	return(temp_dataframe)
	}

	#############################################################
	# Part 2 - comment the the function
	#############################################################

	get_feature_abundance_by_theme <- function(descriptor){
	temp_dataframe <- ross_data %>%
	filter(grepl(descriptor, TITLE)) %>%
	select(-EPISODE, -TITLE) %>%
	gather(feature, presence) %>%
	group_by(feature) %>%
	summarise(abundance = sum(presence)) %>%
	mutate(theme = descriptor) %>%
	arrange(desc(abundance))
	return(temp_dataframe)
	}

	#############################################################
	# Part 3 - Run this function on each of the themes and output a single dataframe
	#############################################################

	# Using a for loop


	# Using an apply
	lapply(themes, get_feature_abundance_by_theme) %>%
	reduce(rbind)

	# Using a map function
	map_df(themes, get_feature_abundance_by_theme)


	#############################################################
	# Part 4 - Run this function on all themes
	#############################################################

	# you can use the following code to pick the unique themes
	all_themes <- ross_data %>%
	separate(TITLE, c('word1', 'word2', 'word3', 'word4', 'word5')) %>%
	gather(place, word, contains('word')) %>%
	filter(!is.na(word)) %>%
	filter(!(word %in% c('A', 'THE', 'IN'))) %>%
	unique() %>%
	pull(word)

	# this takes a little while, 1-2 mins
	map_df(all_themes, get_feature_abundance_by_theme)