Last active
August 1, 2018 15:38
-
-
Save diegovalle/f6275ac1aa83e0a7d9e3 to your computer and use it in GitHub Desktop.
changes and trends in homicide rates in the most violent metro areas or big municipios or at the national level
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "Changes in homicide rates" | |
output: html_document | |
--- | |
I've updated the [mxmortalitydb](https://github.com/diegovalle/mxmortalitydb) package to include 2013 data. This data only packages includes all injury intent deaths (accidents, homicides, suicides and unspcified intent) that were registered in Mexico from 2004 to 2013. You can use the package to calculate changes and trends in homicide rates in the most violent metro areas or big municipios. | |
```{r} | |
#if (!require('devtools')) install.packages('devtools') | |
#devtools::install_github('diegovalle/mxmortalitydb') | |
library(mxmortalitydb) | |
library(stringr) | |
library(dplyr) | |
library(ggplot2) | |
library(grid) ## needed for arrow | |
``` | |
```{r} | |
plotMetro <- function(metro.name, metro.areas) { | |
## Plot the homicide counts in a metro area or municipio | |
## metro.name - name of the metro area to plot | |
## metro.areas - data frame containing a list of metro areas in the same format as the metro.area dataframe from mxmortalitydb | |
## data.frame metro.areas contains the 2010 CONAPO metro areas | |
df <- merge(injury.intent, metro.areas, by.x = c("state_reg", "mun_reg"), | |
by.y = c("state_code", "mun_code")) | |
## Yearly homicides in Mexico City, by state of registration | |
# df2 <- ddply(subset(df, metro_area == metro.name & intent.imputed == "Homicide"), .(year_reg), | |
# summarise, count = length(state_reg)) | |
ggplot(df2, aes(year_reg, count)) + geom_line() + | |
labs(title = str_c("Homicides (plus deaths of unknown intent classified as homicide) in\n", metro.name)) + | |
ylim(0, max(df2$count)) + | |
ylab("homicide count") + | |
xlab("year of registration") + | |
theme_bw() | |
} | |
plotChanges <- function(df, metro.areas, country.rate, years) { | |
## Plot of rates and trends | |
## df - injury.intent dataframe | |
## metro.areas - data frame containing a list of metro areas in the same format as the metro.area dataframe from mxmortalitydb | |
## country.rate - rate to show as a gray dotted line | |
## years - start and end year to compare changes | |
# Where to place the country average annotation | |
yName = "Celaya" | |
## When the municipio where the death occurred is unknown use | |
## the municipio where it was registered as place of occurrance | |
df[df$mun_occur_death == 999, ]$state_occur_death <- | |
df[df$mun_occur_death == 999, ]$state_reg | |
df[df$mun_occur_death==999, ]$mun_occur_death <- | |
df[df$mun_occur_death==999, ]$mun_reg | |
## Counts of homicide by state, municipio and year | |
df <- df %>% | |
group_by(state_occur_death, mun_occur_death, year_reg) %>% | |
filter( year_reg %in% years, intent.imputed == "Homicide", state_occur_death %in% 1:32) %>% | |
summarise(count = n()) | |
## Merge the counts with our fake metro areas | |
## Since I don't have pop data previous to 2010 use the 2010 data | |
if(years[1] < 2010){ | |
temp <- subset( metro.areas, year == 2010) | |
temp$year <- years[1] | |
metro.areas <- rbind(temp, metro.areas) | |
} | |
df <- merge(df, metro.areas, by.x = c("state_occur_death", "mun_occur_death", "year_reg"), | |
by.y = c("state_code", "mun_code", "year")) | |
## Now get the counts by metro area (which may contain more than one municipio) | |
df <- df %>% | |
group_by(metro_area, year_reg) %>% | |
summarise(count = sum(count), | |
population = sum(pop), | |
rate = count / population * 10^5) | |
## We are only including the metro area if at some time it had a homicide rate of at least 15 | |
df <- subset(df, metro_area %in% subset(df, rate > 15)$metro_area) | |
## Make sure the dataframe is ordered by metro and year | |
df <- df[order(df$metro_area, df$year_reg),] | |
## Data frame for the arrow structure | |
arrows <- df %>% | |
group_by(metro_area) %>% | |
summarise(start = rate[1], | |
end = rate[2], | |
change = ifelse(start >= end, "decrease" ,"increase")) | |
## Order the chart by the homicide rate in 2013 | |
df$metro_area <- reorder(as.factor(df$metro_area), df$rate, function(x) x[[2]]) | |
#df$metro_area <- factor(df$metro_area, levels=levels(df$metro_area), ordered = TRUE) | |
ggplot(df, | |
aes(rate, metro_area, | |
group = as.factor(year_reg), | |
color = as.factor(year_reg))) + | |
geom_point(aes(size = log(count))) + | |
labs(title = "Homicide rates and trends\n(includes deaths of unknown intent classified as homicide)") + | |
scale_size("number\nof\nhomicides", breaks = c(log(50),log(500),log(3000)), | |
labels = c(50, 500, 3000)) + | |
geom_segment(data = arrows, aes(x= start , y = metro_area, | |
xend = end, yend = metro_area, | |
group = change, color = change), | |
arrow=arrow(length=unit(0.3,"cm")), alpha = .8) + | |
scale_color_manual("year\nand\ntrend", | |
values = c("gray", "black", "blue", "red")) + | |
ylab("metro area or municipio") + | |
xlab("homicide rate") + | |
#scale_x_log10()+ | |
geom_vline(xintercept = country.rate, linetype = 2, color = "#666666") + | |
annotate("text", y = yName, | |
x = 26, label = "country\naverage\n2013", | |
hjust = 0, size = 4, color = "#666666") + | |
theme_bw() | |
} | |
``` | |
```{r, cache=TRUE} | |
# Download population data from the CONAPO | |
pop <- read.csv(url("http://www.conapo.gob.mx/work/models/CONAPO/Proyecciones/Datos/Bases_de_Datos/Proyecciones_Municipios/CSV/baseprymunMX.csv"), | |
encoding="latin1") | |
pop <- pop %>% | |
group_by(ent, id_ent, mun, id_mun, año) %>% | |
summarise(pop = sum(pob)) | |
names(pop) <- c("ent", "state_code", "mun", "mun_code", "year", "pop") | |
## Let's treat the big municipalities which are not part of a metro area | |
## as if they were one | |
## rename big.municipios to merge with metro.areas | |
big.municipios2 <- big.municipios | |
names(big.municipios2) <- c("state_code", "mun_code", | |
"mun_population_2010", "metro_area") | |
big.municipios2$metro_area <- NA | |
metro.areas.fake <- rbind(metro.areas[,c("state_code", "mun_code", "metro_area")], | |
big.municipios2[,c("state_code", "mun_code", "metro_area")]) | |
pop <- left_join(metro.areas.fake, pop) | |
pop$metro_area <- ifelse(is.na(pop$metro_area), str_c(pop$ent, ", ", pop$mun), pop$metro_area) | |
``` | |
Homicide rate | |
```{r fig.width=9, fig.height=6} | |
## Chart of homicide rates in all of Mexico 2004-2013 | |
imp <- injury.intent %>% | |
group_by(year_reg) %>% | |
filter(intent.imputed == "Homicide", state_occur_death %in% 1:32) %>% | |
summarise(count = n()) | |
imp$pop <- c(105951569, 107151011, 108408827, 109787388, 111299015, 112852594, | |
114255555, 115682868, 117053750, 118395054) | |
imp$rate <- imp$count /imp$pop * 10^5 | |
ggplot(imp, aes(year_reg, rate)) + | |
geom_line() + | |
ggtitle("Homicide rate in Mexico 2004-2013 (includes deaths of unknown intent classified as homicides)") + | |
ylim(0, 28) + | |
xlab("year") + | |
theme_bw() | |
# Charts of deaths of unknown intent by mechanism | |
uintent <- injury.intent %>% | |
group_by(year_reg, intent, mechanism) %>% | |
filter(mechanism %in% c("Cut/pierce", "Firearm", NA), | |
!intent %in% "Homicide", state_occur_death %in% 1:32 ) %>% | |
summarise(count = n()) | |
uintent$intent <- as.character(uintent$intent) | |
uintent$intent[is.na(uintent$intent)] <- "Unspecified" | |
ggplot(uintent, aes(year_reg, count, group = intent, color = intent)) + | |
geom_line() + | |
ggtitle("Injury intent deaths by selected mechanism (excluding homicides)") + | |
xlab("year of registration") + | |
facet_wrap(~mechanism, scales="free_y", nrow = 2) + | |
theme_bw() | |
``` | |
And changes from 2012 to 2013 and 2006 to 2012: | |
```{r fig.width=8, fig.height=10.6} | |
plotChanges(injury.intent, pop, | |
country.rate = 25047/118395054*10^5, | |
years = c(2012,2013)) | |
ggsave("change2012-2013.svg", dpi = 100, width = 8, height = 10.60) | |
plotChanges(injury.intent, pop, | |
country.rate = 25047/118395054*10^5, | |
years = c(2006,2013)) | |
ggsave("change2006-2013.svg", dpi = 100, width = 8, height = 10.60) | |
``` | |
Note how violent Apatzingán, Michoacán has become. | |
Do note that the charts of homicides from 2006 were made with the 2010 population according to the CONAPO, so the homicide rate is underestimated by a little bit. Also rather than using the raw homicide numbers I adjusted them by classifying deaths of unknown intent as described in the [mxmortalitydb package](https://github.com/diegovalle/mxmortalitydb) and this [post](http://blog.diegovalle.net/2012/12/mexicos-drug-war-63000-extra-deaths-in.html). | |
```{r fig.width=7, fig.height=6} | |
# ll <- list("Acapulco", "Iguala de la Independencia", "Zihuatanejo de Azueta", | |
# "Nuevo Laredo", "La Laguna", | |
# "Chihuahua", "Tecomán", "Juárez", "Culiacán", | |
# "Victoria", "Hidalgo del Parral", | |
# "El Mante", | |
# "Ciudad Valles", | |
# "Durango", "Cuernavaca", "Zacatecas-Guadalupe", | |
# "Monterrey", | |
# "Piedras Negras", "Mazatlán", "Veracruz", | |
# "Tijuana", "Guadalajara", | |
# "Tepic", "Coatzacoalcos") | |
# names(ll) <- ll # make lapply print the names of the metro areas | |
# lapply(ll, plotMetro, metro.areas.fake) | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment