The Counted State Map
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
the.counted <- read.csv("./Data/TheCountedWithGeo.csv") | |
summary(the.counted) | |
#http://stackoverflow.com/questions/13420700/r-ggplot2-ggmap-concentric-circles-as-points | |
library(ggplot2) | |
library(maps) | |
all.states <- map_data("state") | |
plot <- ggplot() | |
plot <- plot + geom_polygon(data=all.states, aes(x=long, y=lat, group = group), | |
colour="grey", fill="white" ) | |
plot <- plot + geom_point(data=the.counted, aes(x=lon, y=lat), | |
colour="#FF0040") | |
plot | |
head(the.counted,4) | |
#sum up into States | |
the.counted.state <- data.frame(table(the.counted$state)) | |
colnames(the.counted.state ) <- c("StateCode","NumberKilled") | |
summary(the.counted.state) | |
#bring in population of each state | |
#http://www.census.gov/popest/data/state/asrh/2014/files/SCPRC-EST2014-18+POP-RES.csv | |
state.population <- read.csv("http://www.census.gov/popest/data/state/asrh/2014/files/SCPRC-EST2014-18+POP-RES.csv") | |
state.population | |
#state crosswalk | |
state.crosswalk <- read.csv("http://www.fonz.net/blog/wp-content/uploads/2008/04/states.csv") | |
state.crosswalk | |
state.population.2 <- state.population[c(5,6)] | |
state.population.3 <- merge(x=state.population.2, | |
y=state.crosswalk, | |
by.x="NAME", | |
by.y="State") | |
#The Counted With Population | |
the.counted.state <- merge(x=the.counted.state, | |
y=state.population.3, | |
by.x="StateCode", | |
by.y="Abbreviation") | |
the.counted.state.2 <- the.counted.state | |
the.counted.state.2$KilledRatio <- the.counted.state.2$NumberKilled/the.counted.state.2$POPESTIMATE2014 | |
the.counted.state.3 <- the.counted.state | |
the.counted.state.3$AdjustedPopulation <- the.counted.state.2$POPESTIMATE2014/10000 | |
the.counted.state.3$KilledRatio <- the.counted.state.3$NumberKilled/the.counted.state.3$AdjustedPopulation | |
the.counted.state.3$AdjKilledRatio <- the.counted.state.3$KilledRatio * 10 | |
names(the.counted.state.3)[names(the.counted.state.3)=="NAME"] <- "StateName" | |
the.counted.state.3$StateName <- tolower(the.counted.state.3$StateName) | |
choropleth <- merge(x=all.states, | |
y=the.counted.state.3, | |
sort = FALSE, | |
by.x = "region", | |
by.y = "StateName", | |
all.x=TRUE) | |
choropleth <- choropleth[order(choropleth$order), ] | |
summary(choropleth) | |
qplot(long, lat, data = choropleth, group = group, fill = AdjKilledRatio, | |
geom = "polygon") | |
the.counted.state.4 <- data.frame(the.counted.state.3$StateName,the.counted.state.3$AdjKilledRatio) | |
colnames(the.counted.state.4) <- c("StateName","AdjKilledRatio") | |
#http://stackoverflow.com/questions/5570293/r-adding-column-which-contains-bin-value-of-another-column | |
the.counted.state.4$AdjKilledRatio | |
summary(the.counted.state.4$AdjKilledRatio) | |
the.counted.state.4$KilledBin <- cut(the.counted.state.4$AdjKilledRatio, | |
breaks=seq(0,1,.1)) | |
summary(the.counted.state.4$KilledBin) | |
#but I have 10 bins and I really only need 3 | |
#less than .1, .1 to .2, greater than .2 | |
#http://stackoverflow.com/questions/11963508/generate-bins-from-a-data-frame | |
the.counted.state.4$KilledBin <- cut(the.counted.state.4$AdjKilledRatio, | |
breaks=c(seq(0,.2,.1),Inf), | |
labels=c("low","med","high")) | |
summary(the.counted.state.4$KilledBin) | |
choropleth.2 <- merge(x=all.states, | |
y=the.counted.state.4, | |
sort = FALSE, | |
by.x = "region", | |
by.y = "StateName", | |
all.x=TRUE) | |
choropleth.2 <- choropleth.2[order(choropleth.2$order), ] | |
summary(choropleth.2) | |
qplot(long, | |
lat, | |
data = choropleth.2, | |
group = group, | |
fill = KilledBin, | |
geom = "polygon") | |
#https://www.fbi.gov/about-us/cjis/ucr/leoka/2013/tables/table_1_leos_fk_region_geographic_division_and_state_2004-2013.xls | |
officers.killed <- read.csv("./Data/table_1_leos_fk_region_geographic_division_and_state_2013.csv") | |
sum(officers.killed$OfficersKilled) | |
officers.killed.2 <- merge(x=officers.killed, | |
y=state.population.3, | |
by.x="StateName", | |
by.y="NAME") | |
officers.killed.2$AdjustedPopulation <- officers.killed.2$POPESTIMATE2014/10000 | |
officers.killed.2$KilledRatio <- officers.killed.2$OfficersKilled/officers.killed.2$AdjustedPopulation | |
officers.killed.2$AdjKilledRatio <- officers.killed.2$KilledRatio * 10 | |
officers.killed.2$StateName <- tolower(officers.killed.2$StateName) | |
choropleth.3 <- merge(x=all.states, | |
y=officers.killed.2, | |
sort = FALSE, | |
by.x = "region", | |
by.y = "StateName", | |
all.x=TRUE) | |
choropleth.3 <- choropleth.3[order(choropleth.3$order), ] | |
summary(choropleth.3) | |
qplot(long, lat, data = choropleth.3, group = group, fill = AdjKilledRatio, | |
geom = "polygon") | |
the.counted.state.5 <- merge(x=the.counted.state.4, | |
y=officers.killed.2, | |
by.x="StateName", | |
by.y="StateName") | |
names(the.counted.state.5)[names(the.counted.state.5)=="AdjKilledRatio.x"] <- "NonPoliceKillRatio" | |
names(the.counted.state.5)[names(the.counted.state.5)=="AdjKilledRatio.y"] <- "PoliceKillRatio" | |
the.counted.state.6 <- data.frame(the.counted.state.5$NonPoliceKillRatio, | |
the.counted.state.5$PoliceKillRatio, | |
log(the.counted.state.5$NonPoliceKillRatio), | |
log(the.counted.state.5$PoliceKillRatio)) | |
colnames(the.counted.state.6) <- c("NonPoliceKilledRatio","PoliceKilledRatio","LoggedNonPoliceKilledRatio","LoggedPoliceKilledRatio") | |
plot(the.counted.state.6) | |
non.police <- the.counted.state.6$LoggedNonPoliceKilledRatio | |
police <- the.counted.state.6$LoggedPoliceKilledRatio | |
police[police==-Inf] <- NA | |
model <- lm( non.police ~ police ) | |
summary(model) | |
model.2 <- lm( police ~ non.police) | |
summary(model.2) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment