Skip to content

Instantly share code, notes, and snippets.

@expersso
Last active November 30, 2015 15:20
Show Gist options
  • Save expersso/586c8f2e30765c053596 to your computer and use it in GitHub Desktop.
Save expersso/586c8f2e30765c053596 to your computer and use it in GitHub Desktop.
Fatal dog attacks by category of dog
lapply(c("dplyr", "xml2", "rvest", "stringr", "ggplot2"), library,
character.only = TRUE)
url <- "https://en.wikipedia.org/wiki/Fatal_dog_attacks_in_the_United_States"
page <- read_html(url)
# Each year is represented by its own table
tbls <- page %>%
xml_find_all("//table") %>%
html_table()
# Extract the year for each table from their respective subheadings
years <- page %>%
xml_find_all("//h3/span[contains(text(), 'Fatalities reported in ')]") %>%
xml_text() %>%
str_replace_all("[^0-9]+", "") %>%
as.numeric() %>%
.[. != 1985] # No table for year 1985
# Add years to tables
for(i in seq_along(years)) {
tbls[[i]]$year <- years[i]
}
# Bind together all tables
df <- rbind_all(tbls)
# Remove footnote references and plural endings
df$`Category of Dog` <- df$`Category of Dog` %>%
str_replace_all(" \\(.*\\)|\\(e?s\\)", "")
# Consolidate categories to include mixes
for(dog in c("Pit bull", "Rottweiler", "Bulldog", "Husky", "German Shepherd")) {
df$`Category of Dog`[
str_detect(df$`Category of Dog`, regex(dog, ignore_case = TRUE))] <-
paste(dog, " (incl. mixes)")
}
df %>%
group_by(`Category of Dog`) %>%
summarise(n = n()) %>%
filter(n > 2) %>%
ggplot(aes(x = reorder(`Category of Dog`, n), y = n)) +
geom_bar(stat = "identity") +
coord_flip() +
scale_y_continuous(expand = c(0, 0), limits = c(0, NA)) +
theme_bw() +
labs(x = NULL, y = "\nNumber of attacks",
title = "Fatal dog attacks by category of dog\n United States; 1887-2015")
@expersso
Copy link
Author

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment