Skip to content

Instantly share code, notes, and snippets.

@Tadge-Analytics
Created December 1, 2021 22:51
Show Gist options
  • Save Tadge-Analytics/4ed92c5d40e31eab39d81a7e030c24e0 to your computer and use it in GitHub Desktop.
Save Tadge-Analytics/4ed92c5d40e31eab39d81a7e030c24e0 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(readxl)
###################################################################
import <-
read_excel("PD 2021 Wk 48 Input.xlsx", col_names = F) %>%
janitor::clean_names() %>%
mutate(row_id = row_number())
xx <-
import %>%
mutate(left_col_is_blank = is.na(x1)) %>%
group_by(left_col_is_blank) %>%
mutate(grouping_of_info_set = if_else(left_col_is_blank, 1+row_number(), NA_real_)) %>%
ungroup() %>%
fill(grouping_of_info_set) %>%
mutate(grouping_of_info_set = replace_na(grouping_of_info_set, 1)) %>%
filter(!left_col_is_blank) %>%
select(-left_col_is_blank) %>%
group_by(grouping_of_info_set) %>%
mutate(row_id_by_set = row_number()) %>%
select(-row_id) %>%
gather(key, value, -row_id_by_set, -grouping_of_info_set) %>%
arrange(grouping_of_info_set, key, row_id_by_set) %>%
mutate(location = if_else(key == "x1" & row_id_by_set == 1, value, NA_character_),
time_frame = if_else(key != "x1" & row_id_by_set == 1, value, NA_character_)) %>%
fill(location, time_frame) %>%
arrange(grouping_of_info_set, row_id_by_set, key) %>%
mutate(metric_name = if_else(key == "x1" & row_id_by_set != 1, value, NA_character_)) %>%
fill(metric_name) %>%
ungroup() %>%
filter(!is.na(time_frame), !is.na(metric_name)) %>%
mutate(time_frame = word(time_frame, -1, sep = " ") %>% as.numeric()) %>%
mutate(value = as.numeric(value),
metric_name_has_units = str_detect(metric_name, fixed("(")),
units_of_value = if_else(metric_name_has_units, str_extract(metric_name, "(?<=\\().+?(?=\\))"), NA_character_),
value_muiltiplier = case_when(units_of_value == "m" ~ 1E6,
units_of_value == "k" ~ 1E3,
TRUE ~ 1),
new_value = value * value_muiltiplier,
new_metric_name = if_else(metric_name_has_units, word(metric_name, 1, -2, sep = " "), metric_name)) %>%
select(location, time_frame, new_metric_name, new_value) %>%
spread(new_metric_name, new_value) %>%
janitor::clean_names()
# if you want to make it look a bit closer to the output they asked for
yy <-
xx %>%
gather(clean_measure_names, true_value, -location, -time_frame)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment