Last active
November 7, 2019 12:38
-
-
Save Arf9999/6007c40beeca6a650eeeb69cfaf03679 to your computer and use it in GitHub Desktop.
Cleanup of South African 2019 crime statistics into a two tidy formats
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library (tidyverse) | |
library(readxl) | |
library(httr) | |
library(janitor) | |
##Download crime stats from SAPS site | |
url1 <- | |
"https://www.saps.gov.za/services/2018-2019_crimestatistics.xlsx" #URL of the SAPS excel file | |
GET(url1, write_disk(temp <- | |
tempfile(fileext = ".xlsx"))) # save the xlsx file to disk temporarily | |
crime_stats2019 <- | |
read_excel(temp, sheet = "Station data") # the data is contained on this "Station data" sheet | |
crime_stats2019 <- | |
janitor::clean_names(crimestats_2019, case = "snake") #remove spaces and upper case from column titles. | |
###--------Define crime categories and sub categories ---------------- | |
contact_crimes <- c( | |
"Murder", | |
"Sexual Offences", | |
"Attempted murder", | |
"Assault with the intent to inflict grievous bodily harm", | |
"Common assault", | |
"Common robbery", | |
"Robbery with aggravating circumstances" | |
) | |
sexual_offence <- c("Rape", | |
"Sexual Assault", | |
"Attempted Sexual Offences", | |
"Contact Sexual Offences") | |
some_aggravated_robbery <- c( | |
"Carjacking", | |
"Robbery at residential premises", | |
"Robbery at non-residential premises", | |
"Robbery of cash in transit", | |
"Bank robbery", | |
"Truck hijacking" | |
) | |
contact_related_crimes <- c("Arson", | |
"Malicious damage to property") | |
property_related_crimes <- c( | |
"Burglary at non-residential premises", | |
"Burglary at residential premises", | |
"Theft of motor vehicle and motorcycle", | |
"Theft out of or from motor vehicle", | |
"Stock-theft" | |
) | |
other_serious_crimes <- c("All theft not mentioned elsewhere", | |
"Commercial crime", | |
"Shoplifting") | |
crime_detected_as_result_of_police_action <- | |
c( | |
"Illegal possession of firearms and ammunition", | |
"Drug-related crime", | |
"Driving under the influence of alcohol or drugs", | |
"Sexual Offences detected as a result of police action" | |
) | |
x17_Community_reported_serious_crimes <- | |
c( | |
contact_crimes, | |
contact_related_crimes, | |
property_related_crimes, | |
other_serious_crimes | |
) | |
#----------------------------------------------------------# | |
#Remove hard coded category aggregates from the data | |
crime <- crime_stats2019 %>% | |
filter(str_detect(crime_category, "Contact crimes", negate = TRUE)) %>% | |
filter(str_detect(crime_category, "Sexual offenses", negate = TRUE)) %>% | |
filter(str_detect(crime_category, "Contact-related crimes", negate = TRUE)) %>% | |
filter(str_detect(crime_category, "Property-related crimes", negate = TRUE)) %>% | |
filter(str_detect(crime_category, "Other serious crimes", negate = TRUE)) %>% | |
filter(str_detect(crime_category, "17 Community Reported Serious Crimes", negate = TRUE)) %>% | |
filter(str_detect( | |
crime_category, | |
"Crime detected as a result of police action", | |
negate = TRUE | |
)) | |
## Make "TRIO crimes" entries consistent | |
crime$crime_category <- | |
str_replace(crime$crime_category, "Trio crimes", "TRIO crimes") | |
# categorise the data, add marker to ensure that is is clear that | |
# "some_aggravated_robbery_sub" and "TRIO crimes" should not be aggregated in totals. | |
crime <- crime %>% | |
mutate( | |
contact_crime = (crime_category %in% contact_crimes), | |
sexual_offenses = (crime_category %in% sexual_offence), | |
contact_relate_crime = (crime_category %in% contact_related_crimes), | |
prop_related_crime = (crime_category %in% property_related_crimes), | |
other_serious_crime = (crime_category %in% other_serious_crimes), | |
crime_detected_as_result_police_action = ( | |
crime_category %in% crime_detected_as_result_of_police_action | |
), | |
x17_community_reported_crime = (crime_category %in% x17_Community_reported_serious_crimes), | |
some_aggravated_robbery_sub = (crime_category %in% some_aggravated_robbery), | |
include_in_totals = ( | |
some_aggravated_robbery_sub == FALSE & | |
crime_category != "TRIO crimes" | |
) | |
) | |
##save a wide version as an .rds and a .csv | |
saveRDS(crime, paste0("crimestat_clean_2019_wide", Sys.Date(), ".rds")) | |
write_csv(crime, paste0("crimestat_clean_2019_wide", Sys.Date() , ".csv")) | |
##Pivot the data for a long version (allows analysis by year) | |
crime2 <- | |
gather( | |
crime, | |
key = "year", | |
value = "count_crimes_reported" , | |
c( | |
"x2008_2009", | |
"x2009_2010", | |
"x2010_2011", | |
"x2011_2012", | |
"x2012_2013", | |
"x2013_2014", | |
"x2014_2015", | |
"x2015_2016", | |
"x2016_2017", | |
"x2017_2018", | |
"x2018_2019" | |
) | |
) | |
##reorder the columns | |
crime2 <- crime2[, c(18, 1:8, 19, 9:17)] | |
## Save the wide version as .rds and .csv | |
saveRDS(crime2, | |
paste0("crimestat_clean_2019_long", Sys.Date(), ".rds")) | |
write_csv(crime2, | |
paste0("crimestat_clean_2019_long", Sys.Date() , ".csv")) | |
#-------------------------------------------------------# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment