Skip to content

Instantly share code, notes, and snippets.

@jamessdixon
Last active August 29, 2015 14:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jamessdixon/6022289accc872423af6 to your computer and use it in GitHub Desktop.
Save jamessdixon/6022289accc872423af6 to your computer and use it in GitHub Desktop.
The Counted State Map
the.counted <- read.csv("./Data/TheCountedWithGeo.csv")
summary(the.counted)
#http://stackoverflow.com/questions/13420700/r-ggplot2-ggmap-concentric-circles-as-points
library(ggplot2)
library(maps)
all.states <- map_data("state")
plot <- ggplot()
plot <- plot + geom_polygon(data=all.states, aes(x=long, y=lat, group = group),
colour="grey", fill="white" )
plot <- plot + geom_point(data=the.counted, aes(x=lon, y=lat),
colour="#FF0040")
plot
head(the.counted,4)
#sum up into States
the.counted.state <- data.frame(table(the.counted$state))
colnames(the.counted.state ) <- c("StateCode","NumberKilled")
summary(the.counted.state)
#bring in population of each state
#http://www.census.gov/popest/data/state/asrh/2014/files/SCPRC-EST2014-18+POP-RES.csv
state.population <- read.csv("http://www.census.gov/popest/data/state/asrh/2014/files/SCPRC-EST2014-18+POP-RES.csv")
state.population
#state crosswalk
state.crosswalk <- read.csv("http://www.fonz.net/blog/wp-content/uploads/2008/04/states.csv")
state.crosswalk
state.population.2 <- state.population[c(5,6)]
state.population.3 <- merge(x=state.population.2,
y=state.crosswalk,
by.x="NAME",
by.y="State")
#The Counted With Population
the.counted.state <- merge(x=the.counted.state,
y=state.population.3,
by.x="StateCode",
by.y="Abbreviation")
the.counted.state.2 <- the.counted.state
the.counted.state.2$KilledRatio <- the.counted.state.2$NumberKilled/the.counted.state.2$POPESTIMATE2014
the.counted.state.3 <- the.counted.state
the.counted.state.3$AdjustedPopulation <- the.counted.state.2$POPESTIMATE2014/10000
the.counted.state.3$KilledRatio <- the.counted.state.3$NumberKilled/the.counted.state.3$AdjustedPopulation
the.counted.state.3$AdjKilledRatio <- the.counted.state.3$KilledRatio * 10
names(the.counted.state.3)[names(the.counted.state.3)=="NAME"] <- "StateName"
the.counted.state.3$StateName <- tolower(the.counted.state.3$StateName)
choropleth <- merge(x=all.states,
y=the.counted.state.3,
sort = FALSE,
by.x = "region",
by.y = "StateName",
all.x=TRUE)
choropleth <- choropleth[order(choropleth$order), ]
summary(choropleth)
qplot(long, lat, data = choropleth, group = group, fill = AdjKilledRatio,
geom = "polygon")
the.counted.state.4 <- data.frame(the.counted.state.3$StateName,the.counted.state.3$AdjKilledRatio)
colnames(the.counted.state.4) <- c("StateName","AdjKilledRatio")
#http://stackoverflow.com/questions/5570293/r-adding-column-which-contains-bin-value-of-another-column
the.counted.state.4$AdjKilledRatio
summary(the.counted.state.4$AdjKilledRatio)
the.counted.state.4$KilledBin <- cut(the.counted.state.4$AdjKilledRatio,
breaks=seq(0,1,.1))
summary(the.counted.state.4$KilledBin)
#but I have 10 bins and I really only need 3
#less than .1, .1 to .2, greater than .2
#http://stackoverflow.com/questions/11963508/generate-bins-from-a-data-frame
the.counted.state.4$KilledBin <- cut(the.counted.state.4$AdjKilledRatio,
breaks=c(seq(0,.2,.1),Inf),
labels=c("low","med","high"))
summary(the.counted.state.4$KilledBin)
choropleth.2 <- merge(x=all.states,
y=the.counted.state.4,
sort = FALSE,
by.x = "region",
by.y = "StateName",
all.x=TRUE)
choropleth.2 <- choropleth.2[order(choropleth.2$order), ]
summary(choropleth.2)
qplot(long,
lat,
data = choropleth.2,
group = group,
fill = KilledBin,
geom = "polygon")
#https://www.fbi.gov/about-us/cjis/ucr/leoka/2013/tables/table_1_leos_fk_region_geographic_division_and_state_2004-2013.xls
officers.killed <- read.csv("./Data/table_1_leos_fk_region_geographic_division_and_state_2013.csv")
sum(officers.killed$OfficersKilled)
officers.killed.2 <- merge(x=officers.killed,
y=state.population.3,
by.x="StateName",
by.y="NAME")
officers.killed.2$AdjustedPopulation <- officers.killed.2$POPESTIMATE2014/10000
officers.killed.2$KilledRatio <- officers.killed.2$OfficersKilled/officers.killed.2$AdjustedPopulation
officers.killed.2$AdjKilledRatio <- officers.killed.2$KilledRatio * 10
officers.killed.2$StateName <- tolower(officers.killed.2$StateName)
choropleth.3 <- merge(x=all.states,
y=officers.killed.2,
sort = FALSE,
by.x = "region",
by.y = "StateName",
all.x=TRUE)
choropleth.3 <- choropleth.3[order(choropleth.3$order), ]
summary(choropleth.3)
qplot(long, lat, data = choropleth.3, group = group, fill = AdjKilledRatio,
geom = "polygon")
the.counted.state.5 <- merge(x=the.counted.state.4,
y=officers.killed.2,
by.x="StateName",
by.y="StateName")
names(the.counted.state.5)[names(the.counted.state.5)=="AdjKilledRatio.x"] <- "NonPoliceKillRatio"
names(the.counted.state.5)[names(the.counted.state.5)=="AdjKilledRatio.y"] <- "PoliceKillRatio"
the.counted.state.6 <- data.frame(the.counted.state.5$NonPoliceKillRatio,
the.counted.state.5$PoliceKillRatio,
log(the.counted.state.5$NonPoliceKillRatio),
log(the.counted.state.5$PoliceKillRatio))
colnames(the.counted.state.6) <- c("NonPoliceKilledRatio","PoliceKilledRatio","LoggedNonPoliceKilledRatio","LoggedPoliceKilledRatio")
plot(the.counted.state.6)
non.police <- the.counted.state.6$LoggedNonPoliceKilledRatio
police <- the.counted.state.6$LoggedPoliceKilledRatio
police[police==-Inf] <- NA
model <- lm( non.police ~ police )
summary(model)
model.2 <- lm( police ~ non.police)
summary(model.2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment