Skip to content

Instantly share code, notes, and snippets.

@Arf9999
Last active March 31, 2021 08:04
Show Gist options
  • Save Arf9999/5fccfee4497fc320a51a62df3531e4aa to your computer and use it in GitHub Desktop.
Save Arf9999/5fccfee4497fc320a51a62df3531e4aa to your computer and use it in GitHub Desktop.
library (tidyverse)
library(readxl)
library(httr)
library(janitor)
##Download crime stats from SAPS site
url1 <-
"https://www.saps.gov.za/services/Crime-Statistics%202019_2020.xlsx" #URL of the SAPS excel file
GET(url1, write_disk(temp <-
tempfile(fileext = ".xlsx"))) # save the xlsx file to disk temporarily
crime_stats2020 <-
read_excel(temp, sheet = "station data 2020") # the data is contained on this "Station data" sheet
crime_stats2020 <-
janitor::clean_names(crime_stats2020, case = "snake") #remove spaces and upper case from column titles.
###--------Define crime categories and sub categories ----------------
contact_crimes <- c(
"Murder",
"Sexual Offences",
"Attempted murder",
"Assault with the intent to inflict grievous bodily harm",
"Common assault",
"Common robbery",
"Robbery with aggravating circumstances"
)
sexual_offence <- c("Rape",
"Sexual Assault",
"Attempted Sexual Offences",
"Contact Sexual Offences")
some_aggravated_robbery <- c(
"Carjacking",
"Robbery at residential premises",
"Robbery at non-residential premises",
"Robbery of cash in transit",
"Bank robbery",
"Truck hijacking"
)
contact_related_crimes <- c("Arson",
"Malicious damage to property")
property_related_crimes <- c(
"Burglary at non-residential premises",
"Burglary at residential premises",
"Theft of motor vehicle and motorcycle",
"Theft out of or from motor vehicle",
"Stock-theft"
)
other_serious_crimes <- c("All theft not mentioned elsewhere",
"Commercial crime",
"Shoplifting")
crime_detected_as_result_of_police_action <-
c(
"Illegal possession of firearms and ammunition",
"Drug-related crime",
"Driving under the influence of alcohol or drugs",
"Sexual Offences detected as a result of police action"
)
x17_Community_reported_serious_crimes <-
c(
contact_crimes,
contact_related_crimes,
property_related_crimes,
other_serious_crimes
)
#----------------------------------------------------------#
#Remove hard coded category aggregates from the data
crime <- crime_stats2020 %>%
filter(str_detect(crime_category, "Contact crimes", negate = TRUE)) %>%
filter(str_detect(crime_category, "Sexual offenses", negate = TRUE)) %>%
filter(str_detect(crime_category, "Contact-related crimes", negate = TRUE)) %>%
filter(str_detect(crime_category, "Property-related crimes", negate = TRUE)) %>%
filter(str_detect(crime_category, "Other serious crimes", negate = TRUE)) %>%
filter(str_detect(crime_category, "17 Community Reported Serious Crimes", negate = TRUE)) %>%
filter(str_detect(
crime_category,
"Crime detected as a result of police action",
negate = TRUE
))
## Make "TRIO crimes" entries consistent
crime$crime_category <-
str_replace(crime$crime_category, "Trio crimes", "TRIO crimes")
# categorise the data, add marker to ensure that is is clear that
# "some_aggravated_robbery_sub" and "TRIO crimes" should not be aggregated in totals.
crime <- crime %>%
mutate(
contact_crime = (crime_category %in% contact_crimes),
sexual_offenses = (crime_category %in% sexual_offence),
contact_relate_crime = (crime_category %in% contact_related_crimes),
prop_related_crime = (crime_category %in% property_related_crimes),
other_serious_crime = (crime_category %in% other_serious_crimes),
crime_detected_as_result_police_action = (
crime_category %in% crime_detected_as_result_of_police_action
),
x17_community_reported_crime = (crime_category %in% x17_Community_reported_serious_crimes),
some_aggravated_robbery_sub = (crime_category %in% some_aggravated_robbery),
include_in_totals = (
some_aggravated_robbery_sub == FALSE &
crime_category != "TRIO crimes"
)
)
##save a wide version as an .rds and a .csv
saveRDS(crime, paste0("crimestat_clean_2020_wide", Sys.Date(), ".rds"))
write_csv(crime, paste0("crimestat_clean_2020_wide", Sys.Date() , ".csv"))
##Pivot the data for a long version (allows analysis by year)
crime2 <-
gather(
crime,
key = "year",
value = "count_crimes_reported" ,
c(
"x2008_2009",
"x2009_2010",
"x2010_2011",
"x2011_2012",
"x2012_2013",
"x2013_2014",
"x2014_2015",
"x2015_2016",
"x2016_2017",
"x2017_2018",
"x2018_2019",
"x2019_2020_21",
"x2019_2020_24")
)
##reorder the columns
crime2 <- crime2[, c(22, 1:9, 23, 10:21)]
## Save the long version as .rds and .csv
saveRDS(crime2,
paste0("crimestat_clean_2020_long", Sys.Date(), ".rds"))
write_csv(crime2,
paste0("crimestat_clean_2020_long", Sys.Date() , ".csv"))
#-------------------------------------------------------#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment