Last active
March 31, 2021 08:04
-
-
Save Arf9999/5fccfee4497fc320a51a62df3531e4aa to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library (tidyverse) | |
library(readxl) | |
library(httr) | |
library(janitor) | |
##Download crime stats from SAPS site | |
url1 <- | |
"https://www.saps.gov.za/services/Crime-Statistics%202019_2020.xlsx" #URL of the SAPS excel file | |
GET(url1, write_disk(temp <- | |
tempfile(fileext = ".xlsx"))) # save the xlsx file to disk temporarily | |
crime_stats2020 <- | |
read_excel(temp, sheet = "station data 2020") # the data is contained on this "Station data" sheet | |
crime_stats2020 <- | |
janitor::clean_names(crime_stats2020, case = "snake") #remove spaces and upper case from column titles. | |
###--------Define crime categories and sub categories ---------------- | |
contact_crimes <- c( | |
"Murder", | |
"Sexual Offences", | |
"Attempted murder", | |
"Assault with the intent to inflict grievous bodily harm", | |
"Common assault", | |
"Common robbery", | |
"Robbery with aggravating circumstances" | |
) | |
sexual_offence <- c("Rape", | |
"Sexual Assault", | |
"Attempted Sexual Offences", | |
"Contact Sexual Offences") | |
some_aggravated_robbery <- c( | |
"Carjacking", | |
"Robbery at residential premises", | |
"Robbery at non-residential premises", | |
"Robbery of cash in transit", | |
"Bank robbery", | |
"Truck hijacking" | |
) | |
contact_related_crimes <- c("Arson", | |
"Malicious damage to property") | |
property_related_crimes <- c( | |
"Burglary at non-residential premises", | |
"Burglary at residential premises", | |
"Theft of motor vehicle and motorcycle", | |
"Theft out of or from motor vehicle", | |
"Stock-theft" | |
) | |
other_serious_crimes <- c("All theft not mentioned elsewhere", | |
"Commercial crime", | |
"Shoplifting") | |
crime_detected_as_result_of_police_action <- | |
c( | |
"Illegal possession of firearms and ammunition", | |
"Drug-related crime", | |
"Driving under the influence of alcohol or drugs", | |
"Sexual Offences detected as a result of police action" | |
) | |
x17_Community_reported_serious_crimes <- | |
c( | |
contact_crimes, | |
contact_related_crimes, | |
property_related_crimes, | |
other_serious_crimes | |
) | |
#----------------------------------------------------------# | |
#Remove hard coded category aggregates from the data | |
crime <- crime_stats2020 %>% | |
filter(str_detect(crime_category, "Contact crimes", negate = TRUE)) %>% | |
filter(str_detect(crime_category, "Sexual offenses", negate = TRUE)) %>% | |
filter(str_detect(crime_category, "Contact-related crimes", negate = TRUE)) %>% | |
filter(str_detect(crime_category, "Property-related crimes", negate = TRUE)) %>% | |
filter(str_detect(crime_category, "Other serious crimes", negate = TRUE)) %>% | |
filter(str_detect(crime_category, "17 Community Reported Serious Crimes", negate = TRUE)) %>% | |
filter(str_detect( | |
crime_category, | |
"Crime detected as a result of police action", | |
negate = TRUE | |
)) | |
## Make "TRIO crimes" entries consistent | |
crime$crime_category <- | |
str_replace(crime$crime_category, "Trio crimes", "TRIO crimes") | |
# categorise the data, add marker to ensure that is is clear that | |
# "some_aggravated_robbery_sub" and "TRIO crimes" should not be aggregated in totals. | |
crime <- crime %>% | |
mutate( | |
contact_crime = (crime_category %in% contact_crimes), | |
sexual_offenses = (crime_category %in% sexual_offence), | |
contact_relate_crime = (crime_category %in% contact_related_crimes), | |
prop_related_crime = (crime_category %in% property_related_crimes), | |
other_serious_crime = (crime_category %in% other_serious_crimes), | |
crime_detected_as_result_police_action = ( | |
crime_category %in% crime_detected_as_result_of_police_action | |
), | |
x17_community_reported_crime = (crime_category %in% x17_Community_reported_serious_crimes), | |
some_aggravated_robbery_sub = (crime_category %in% some_aggravated_robbery), | |
include_in_totals = ( | |
some_aggravated_robbery_sub == FALSE & | |
crime_category != "TRIO crimes" | |
) | |
) | |
##save a wide version as an .rds and a .csv | |
saveRDS(crime, paste0("crimestat_clean_2020_wide", Sys.Date(), ".rds")) | |
write_csv(crime, paste0("crimestat_clean_2020_wide", Sys.Date() , ".csv")) | |
##Pivot the data for a long version (allows analysis by year) | |
crime2 <- | |
gather( | |
crime, | |
key = "year", | |
value = "count_crimes_reported" , | |
c( | |
"x2008_2009", | |
"x2009_2010", | |
"x2010_2011", | |
"x2011_2012", | |
"x2012_2013", | |
"x2013_2014", | |
"x2014_2015", | |
"x2015_2016", | |
"x2016_2017", | |
"x2017_2018", | |
"x2018_2019", | |
"x2019_2020_21", | |
"x2019_2020_24") | |
) | |
##reorder the columns | |
crime2 <- crime2[, c(22, 1:9, 23, 10:21)] | |
## Save the long version as .rds and .csv | |
saveRDS(crime2, | |
paste0("crimestat_clean_2020_long", Sys.Date(), ".rds")) | |
write_csv(crime2, | |
paste0("crimestat_clean_2020_long", Sys.Date() , ".csv")) | |
#-------------------------------------------------------# | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment