Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
# Remove beers without abv listings; then see # of distinct beers breweries produce.
# Also compared to the mean/median # of beers made, but
# that doesn't seem particularly useful
beers_per_brewery <- reviews_raw %>%
filter(!is.na(beer_abv))%>%
# double count statement: first counts brewery, beer, # reviews
# second aggregates that and returns number of distinct beers produced by brewery
count(brewery_name, beer_name, sort = TRUE)%>%
count(brewery_name, sort = TRUE)%>%
# a count of how many brewerys only make one beer
mutate(produce_one = sum(n==1)/n())
# create df of mean and median for easy plot labeling
brewery_measures <- beers_per_brewery %>%
summarise(Mean = mean(n),
Median = median(n))%>%
gather()
# create plot
beers_per_brewery %>%
ggplot(aes(n))+
geom_histogram(fill = "#0072B2", color = 'white', alpha = .8)+
geom_vline(data = brewery_measures,
aes(xintercept = value, color = key),
lty = 2,
size = .9)+
scale_x_log10()+
scale_y_continuous(limits = c(0, 1100),
expand = c(0,0),
labels = comma_format())+
theme(legend.position = c(.75, .87))+
labs(x = "# of Beers",
y = "# of Breweries",
title = "Number of Unique Beers Produced by Breweries",
subtitle = paste0(percent(beers_per_brewery$produce_one[1]), " of breweries in the dataset only produce 1 beer"),
color = "")+
my_theme_tweaks()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment