library(tidyverse)
set.seed(1234)
# Make a fake version of your dataset
possible_V4529s <- c("Completed rape", "Attempted rape", "Sex aslt w m aslt",
"Sex aslt w s aslt", "At rob inj m asl", "Rob w inj maslt",
"Simp aslt w inj", "At mtr veh theft")
# sample() chooses a random element from a list. replace = TRUE means that once
# an element has been chosen, it can be chosen again
your_real_data <- tibble(id = 1:50,
V4529 = sample(possible_V4529s, 50, replace = TRUE))
head(your_real_data)
#> # A tibble: 6 x 2
#> id V4529
#> <int> <chr>
#> 1 1 Sex aslt w s aslt
#> 2 2 At mtr veh theft
#> 3 3 Attempted rape
#> 4 4 Rob w inj maslt
#> 5 5 At rob inj m asl
#> 6 6 Sex aslt w s aslt
# Create a lookup table for V4529, either as a CSV file that you load with
# read_csv(), or manually here with tibble or tribble():
lookup_v4529 <- tribble(
~V4529, ~newoff,
"Completed rape", 1,
"Attempted rape", 1,
"Sex aslt w m aslt", 1,
"Sex aslt w s aslt", 1,
"At rob inj m asl", 2,
"Rob w inj maslt", 2,
"Simp aslt w inj", 4,
"At mtr veh theft", 7
)
lookup_v4529
#> # A tibble: 8 x 2
#> V4529 newoff
#> <chr> <dbl>
#> 1 Completed rape 1
#> 2 Attempted rape 1
#> 3 Sex aslt w m aslt 1
#> 4 Sex aslt w s aslt 1
#> 5 At rob inj m asl 2
#> 6 Rob w inj maslt 2
#> 7 Simp aslt w inj 4
#> 8 At mtr veh theft 7
# Add the lookup table to the dataset. Both datasets have a column named
# "V4529", so specify that as the common column
your_real_data_cleaner <- your_real_data %>%
left_join(lookup_v4529, by = "V4529")
# It worked!
head(your_real_data_cleaner)
#> # A tibble: 6 x 3
#> id V4529 newoff
#> <int> <chr> <dbl>
#> 1 1 Sex aslt w s aslt 1
#> 2 2 At mtr veh theft 7
#> 3 3 Attempted rape 1
#> 4 4 Rob w inj maslt 2
#> 5 5 At rob inj m asl 2
#> 6 6 Sex aslt w s aslt 1
Created
October 19, 2020 16:32
-
-
Save andrewheiss/4e62bc4dbba04acc9b157f85944a076e to your computer and use it in GitHub Desktop.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment