jamessdixon/TheCountedStateMap

## TheCountedStateMap
the.counted <- read.csv("./Data/TheCountedWithGeo.csv")
summary(the.counted)

#http://stackoverflow.com/questions/13420700/r-ggplot2-ggmap-concentric-circles-as-points
library(ggplot2)
library(maps)

all.states <- map_data("state")
plot <- ggplot()
plot <- plot + geom_polygon(data=all.states, aes(x=long, y=lat, group = group),
                      colour="grey", fill="white" )
plot <- plot + geom_point(data=the.counted, aes(x=lon, y=lat),
                    colour="#FF0040")
plot

head(the.counted,4)

#sum up into States
the.counted.state <- data.frame(table(the.counted$state))
colnames(the.counted.state ) <- c("StateCode","NumberKilled")
summary(the.counted.state)

#bring in population of each state
#http://www.census.gov/popest/data/state/asrh/2014/files/SCPRC-EST2014-18+POP-RES.csv
state.population <- read.csv("http://www.census.gov/popest/data/state/asrh/2014/files/SCPRC-EST2014-18+POP-RES.csv")
state.population

#state crosswalk
state.crosswalk <- read.csv("http://www.fonz.net/blog/wp-content/uploads/2008/04/states.csv")
state.crosswalk

state.population.2 <- state.population[c(5,6)]
state.population.3 <- merge(x=state.population.2,
                            y=state.crosswalk,
                            by.x="NAME",
                            by.y="State")
#The Counted With Population
the.counted.state <- merge(x=the.counted.state,
                           y=state.population.3,
                            by.x="StateCode",
                            by.y="Abbreviation")

the.counted.state.2 <- the.counted.state
the.counted.state.2$KilledRatio <- the.counted.state.2$NumberKilled/the.counted.state.2$POPESTIMATE2014

the.counted.state.3 <- the.counted.state
the.counted.state.3$AdjustedPopulation <- the.counted.state.2$POPESTIMATE2014/10000
the.counted.state.3$KilledRatio <- the.counted.state.3$NumberKilled/the.counted.state.3$AdjustedPopulation
the.counted.state.3$AdjKilledRatio <- the.counted.state.3$KilledRatio * 10

names(the.counted.state.3)[names(the.counted.state.3)=="NAME"] <- "StateName"
the.counted.state.3$StateName <- tolower(the.counted.state.3$StateName)

choropleth <- merge(x=all.states,
               y=the.counted.state.3,
               sort = FALSE,
               by.x = "region",
               by.y = "StateName",
               all.x=TRUE)
choropleth <- choropleth[order(choropleth$order), ]
summary(choropleth)

qplot(long, lat, data = choropleth, group = group, fill = AdjKilledRatio,
        geom = "polygon")

the.counted.state.4 <- data.frame(the.counted.state.3$StateName,the.counted.state.3$AdjKilledRatio)
colnames(the.counted.state.4) <- c("StateName","AdjKilledRatio")

#http://stackoverflow.com/questions/5570293/r-adding-column-which-contains-bin-value-of-another-column
the.counted.state.4$AdjKilledRatio
summary(the.counted.state.4$AdjKilledRatio)
the.counted.state.4$KilledBin <- cut(the.counted.state.4$AdjKilledRatio,
                                     breaks=seq(0,1,.1))
summary(the.counted.state.4$KilledBin)

#but I have 10 bins and I really only need 3
#less than .1, .1 to .2, greater than .2
#http://stackoverflow.com/questions/11963508/generate-bins-from-a-data-frame
the.counted.state.4$KilledBin <- cut(the.counted.state.4$AdjKilledRatio,
                                     breaks=c(seq(0,.2,.1),Inf),
                                     labels=c("low","med","high"))

summary(the.counted.state.4$KilledBin)
choropleth.2 <- merge(x=all.states,
                    y=the.counted.state.4,
                    sort = FALSE,
                    by.x = "region",
                    by.y = "StateName",
                    all.x=TRUE)
choropleth.2 <- choropleth.2[order(choropleth.2$order), ]
summary(choropleth.2)

qplot(long,
      lat,
      data = choropleth.2,
      group = group,
      fill = KilledBin,
      geom = "polygon")


#https://www.fbi.gov/about-us/cjis/ucr/leoka/2013/tables/table_1_leos_fk_region_geographic_division_and_state_2004-2013.xls
officers.killed <- read.csv("./Data/table_1_leos_fk_region_geographic_division_and_state_2013.csv")
sum(officers.killed$OfficersKilled)

officers.killed.2 <- merge(x=officers.killed,
                           y=state.population.3,
                           by.x="StateName",
                           by.y="NAME")

officers.killed.2$AdjustedPopulation <- officers.killed.2$POPESTIMATE2014/10000
officers.killed.2$KilledRatio <- officers.killed.2$OfficersKilled/officers.killed.2$AdjustedPopulation
officers.killed.2$AdjKilledRatio <- officers.killed.2$KilledRatio * 10
officers.killed.2$StateName <- tolower(officers.killed.2$StateName)

choropleth.3 <- merge(x=all.states,
                    y=officers.killed.2,
                    sort = FALSE,
                    by.x = "region",
                    by.y = "StateName",
                    all.x=TRUE)
choropleth.3 <- choropleth.3[order(choropleth.3$order), ]
summary(choropleth.3)

qplot(long, lat, data = choropleth.3, group = group, fill = AdjKilledRatio,
      geom = "polygon")

the.counted.state.5 <- merge(x=the.counted.state.4,
                           y=officers.killed.2,
                           by.x="StateName",
                           by.y="StateName")

names(the.counted.state.5)[names(the.counted.state.5)=="AdjKilledRatio.x"] <- "NonPoliceKillRatio"
names(the.counted.state.5)[names(the.counted.state.5)=="AdjKilledRatio.y"] <- "PoliceKillRatio"

the.counted.state.6 <- data.frame(the.counted.state.5$NonPoliceKillRatio,
                                  the.counted.state.5$PoliceKillRatio,
                                  log(the.counted.state.5$NonPoliceKillRatio),
                                  log(the.counted.state.5$PoliceKillRatio))

colnames(the.counted.state.6) <- c("NonPoliceKilledRatio","PoliceKilledRatio","LoggedNonPoliceKilledRatio","LoggedPoliceKilledRatio")

plot(the.counted.state.6)

non.police <- the.counted.state.6$LoggedNonPoliceKilledRatio
police <- the.counted.state.6$LoggedPoliceKilledRatio
police[police==-Inf] <- NA

model <- lm( non.police ~ police )
summary(model)

model.2 <- lm( police ~ non.police)
summary(model.2)
	the.counted <- read.csv("./Data/TheCountedWithGeo.csv")
	summary(the.counted)

	#http://stackoverflow.com/questions/13420700/r-ggplot2-ggmap-concentric-circles-as-points
	library(ggplot2)
	library(maps)

	all.states <- map_data("state")
	plot <- ggplot()
	plot <- plot + geom_polygon(data=all.states, aes(x=long, y=lat, group = group),
	colour="grey", fill="white" )
	plot <- plot + geom_point(data=the.counted, aes(x=lon, y=lat),
	colour="#FF0040")
	plot

	head(the.counted,4)

	#sum up into States
	the.counted.state <- data.frame(table(the.counted$state))
	colnames(the.counted.state ) <- c("StateCode","NumberKilled")
	summary(the.counted.state)

	#bring in population of each state
	#http://www.census.gov/popest/data/state/asrh/2014/files/SCPRC-EST2014-18+POP-RES.csv
	state.population <- read.csv("http://www.census.gov/popest/data/state/asrh/2014/files/SCPRC-EST2014-18+POP-RES.csv")
	state.population

	#state crosswalk
	state.crosswalk <- read.csv("http://www.fonz.net/blog/wp-content/uploads/2008/04/states.csv")
	state.crosswalk

	state.population.2 <- state.population[c(5,6)]
	state.population.3 <- merge(x=state.population.2,
	y=state.crosswalk,
	by.x="NAME",
	by.y="State")
	#The Counted With Population
	the.counted.state <- merge(x=the.counted.state,
	y=state.population.3,
	by.x="StateCode",
	by.y="Abbreviation")

	the.counted.state.2 <- the.counted.state
	the.counted.state.2$KilledRatio <- the.counted.state.2$NumberKilled/the.counted.state.2$POPESTIMATE2014

	the.counted.state.3 <- the.counted.state
	the.counted.state.3$AdjustedPopulation <- the.counted.state.2$POPESTIMATE2014/10000
	the.counted.state.3$KilledRatio <- the.counted.state.3$NumberKilled/the.counted.state.3$AdjustedPopulation
	the.counted.state.3$AdjKilledRatio <- the.counted.state.3$KilledRatio * 10

	names(the.counted.state.3)[names(the.counted.state.3)=="NAME"] <- "StateName"
	the.counted.state.3$StateName <- tolower(the.counted.state.3$StateName)

	choropleth <- merge(x=all.states,
	y=the.counted.state.3,
	sort = FALSE,
	by.x = "region",
	by.y = "StateName",
	all.x=TRUE)
	choropleth <- choropleth[order(choropleth$order), ]
	summary(choropleth)

	qplot(long, lat, data = choropleth, group = group, fill = AdjKilledRatio,
	geom = "polygon")

	the.counted.state.4 <- data.frame(the.counted.state.3$StateName,the.counted.state.3$AdjKilledRatio)
	colnames(the.counted.state.4) <- c("StateName","AdjKilledRatio")

	#http://stackoverflow.com/questions/5570293/r-adding-column-which-contains-bin-value-of-another-column
	the.counted.state.4$AdjKilledRatio
	summary(the.counted.state.4$AdjKilledRatio)
	the.counted.state.4$KilledBin <- cut(the.counted.state.4$AdjKilledRatio,
	breaks=seq(0,1,.1))
	summary(the.counted.state.4$KilledBin)

	#but I have 10 bins and I really only need 3
	#less than .1, .1 to .2, greater than .2
	#http://stackoverflow.com/questions/11963508/generate-bins-from-a-data-frame
	the.counted.state.4$KilledBin <- cut(the.counted.state.4$AdjKilledRatio,
	breaks=c(seq(0,.2,.1),Inf),
	labels=c("low","med","high"))

	summary(the.counted.state.4$KilledBin)
	choropleth.2 <- merge(x=all.states,
	y=the.counted.state.4,
	sort = FALSE,
	by.x = "region",
	by.y = "StateName",
	all.x=TRUE)
	choropleth.2 <- choropleth.2[order(choropleth.2$order), ]
	summary(choropleth.2)

	qplot(long,
	lat,
	data = choropleth.2,
	group = group,
	fill = KilledBin,
	geom = "polygon")


	#https://www.fbi.gov/about-us/cjis/ucr/leoka/2013/tables/table_1_leos_fk_region_geographic_division_and_state_2004-2013.xls
	officers.killed <- read.csv("./Data/table_1_leos_fk_region_geographic_division_and_state_2013.csv")
	sum(officers.killed$OfficersKilled)

	officers.killed.2 <- merge(x=officers.killed,
	y=state.population.3,
	by.x="StateName",
	by.y="NAME")

	officers.killed.2$AdjustedPopulation <- officers.killed.2$POPESTIMATE2014/10000
	officers.killed.2$KilledRatio <- officers.killed.2$OfficersKilled/officers.killed.2$AdjustedPopulation
	officers.killed.2$AdjKilledRatio <- officers.killed.2$KilledRatio * 10
	officers.killed.2$StateName <- tolower(officers.killed.2$StateName)

	choropleth.3 <- merge(x=all.states,
	y=officers.killed.2,
	sort = FALSE,
	by.x = "region",
	by.y = "StateName",
	all.x=TRUE)
	choropleth.3 <- choropleth.3[order(choropleth.3$order), ]
	summary(choropleth.3)

	qplot(long, lat, data = choropleth.3, group = group, fill = AdjKilledRatio,
	geom = "polygon")

	the.counted.state.5 <- merge(x=the.counted.state.4,
	y=officers.killed.2,
	by.x="StateName",
	by.y="StateName")

	names(the.counted.state.5)[names(the.counted.state.5)=="AdjKilledRatio.x"] <- "NonPoliceKillRatio"
	names(the.counted.state.5)[names(the.counted.state.5)=="AdjKilledRatio.y"] <- "PoliceKillRatio"

	the.counted.state.6 <- data.frame(the.counted.state.5$NonPoliceKillRatio,
	the.counted.state.5$PoliceKillRatio,
	log(the.counted.state.5$NonPoliceKillRatio),
	log(the.counted.state.5$PoliceKillRatio))

	colnames(the.counted.state.6) <- c("NonPoliceKilledRatio","PoliceKilledRatio","LoggedNonPoliceKilledRatio","LoggedPoliceKilledRatio")

	plot(the.counted.state.6)

	non.police <- the.counted.state.6$LoggedNonPoliceKilledRatio
	police <- the.counted.state.6$LoggedPoliceKilledRatio
	police[police==-Inf] <- NA

	model <- lm( non.police ~ police )
	summary(model)

	model.2 <- lm( police ~ non.police)
	summary(model.2)