Skip to content

Instantly share code, notes, and snippets.

@MattSandy
Created November 13, 2019 16:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MattSandy/e0d1ba4cbe6ec3676e1cc8648a72f485 to your computer and use it in GitHub Desktop.
Save MattSandy/e0d1ba4cbe6ec3676e1cc8648a72f485 to your computer and use it in GitHub Desktop.
Compares Party with Violent Crime Stats in Top 50 US Cities
library(tidyverse)
library(rvest)
library(magrittr)
# Functions ---------------------------------------------------------------
dfmd <- function(df) {
md <- paste(names(df), collapse = " | ")
md %<>% append(rep("---",length(names(df))) %>% paste(collapse = "|"))
for(i in 1:nrow(df)) {
md %<>% append(df[i,] %>% paste(collapse = "|"))
}
return(md)
}
# Violent Crime -----------------------------------------------------------
vc <- "https://en.wikipedia.org/wiki/List_of_United_States_cities_by_crime_rate" %>%
read_html %>% html_nodes("table") %>% html_table(fill = T) %>% .[[1]]
names(vc)[which(names(vc)!=vc[1,])] <- paste0(names(vc)[which(names(vc)!=vc[1,])],
"_",
vc[1,][which(names(vc)!=vc[1,])])
vc <- vc[-1,]
# City Fixes
vc$City[which(vc$City=="Arlington4")] <- "Arlington"
vc$City[which(vc$City=="New York")] <- "New York City"
vc$City[which(vc$City=="Louisville Metro6")] <- "Louisville"
vc$City[which(vc$City=="Nashville Metropolitan")] <- "Nashville"
vc$City[which(vc$City=="Charlotte-Mecklenburg")] <- "Charlotte"
vc$State[which(vc$State=="North Carolina8")] <- "North Carolina"
# Top 50 cities -----------------------------------------------------------
"https://en.wikipedia.org/wiki/List_of_mayors_of_the_50_largest_cities_in_the_United_States" %>%
read_html %>% html_nodes("table") -> tbls
tbls[grep("democrat",tbls,ignore.case = T)]
df <- html_table(tbls[grep("democrat",tbls,ignore.case = T)],fill = T)[[1]]
names(df)[which(names(df)=="Population(July 1, 2017 est.)")] <- "Population"
df$Population %<>% str_replace_all('[^0-9]','') %>% as.numeric
df$Violent_Crime <- apply(df,1,function(row){
result <- vc %>% filter(State == row[["State"]], City == row[["City"]])
return(result$`Violent crime_Total`[1] %>% as.numeric)
})
table(df$Party) %>% prop.table()
# Cities with NA Violent Crime --------------------------------------------
df %>% filter(is.na(Violent_Crime)) %>% select(c(Party,City,State)) %>% dfmd %>% cat(sep = "\n")
# Violent Crime Per Capita ------------------------------------------------
df %>% filter(!is.na(Violent_Crime)) %>%
group_by(Party) %>%
summarise(Population = sum(Population),
Violent_Crime = sum(Violent_Crime)) %>%
mutate(Violent_Crime_Per_Captia = Violent_Crime / Population) %>%
dfmd %>% cat(sep = "\n")
# Percent of Mayors in Each Party -----------------------------------------
df %>%
group_by(Party) %>%
summarise(Mayors = n()) %>%
mutate(Percent = round((Mayors / sum(Mayors)) * 100,2)) %>%
dfmd %>% cat(sep = "\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment