Last active
July 10, 2017 10:18
-
-
Save tts/e60a4001da5f858ada79b3575b1a3576 to your computer and use it in GitHub Desktop.
Transform Pure activities data exported as Excel to be imported into Power BI for reporting
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# `dataset` holds the input data for this script | |
library(dplyr) | |
names(dataset) <- c("Type", "Unit", "Start", "End") | |
dataset$Unit <- iconv(dataset$Unit, from = "WINDOWS-1252", to = "UTF-8") | |
# We need to expand those rows where Start and End span more than one year, | |
# e.g. 2010-2012 into 2010, 2011, 2012 | |
to_expand <- dataset %>% | |
filter(Start != End & !is.null(End)) | |
not_to_expand <- dataset %>% | |
filter(Start == End | is.null(End)) | |
# Function modified from https://stackoverflow.com/a/38501938 | |
duplicate_rows <- function(type, unit, start, end) { | |
expanded_years <- seq(from=as.integer(start), to=as.integer(end), by=1) | |
repeated_rows <- data.frame("Type" = type, "Unit" = unit, "Start" = expanded_years, "End" = expanded_years) | |
repeated_rows | |
} | |
expanded_rows <- Map(f = duplicate_rows, to_expand$Type, to_expand$Unit, to_expand$Start, to_expand$End) | |
data_expanded_rows <- do.call(rbind, expanded_rows) | |
data_all <- rbind(not_to_expand, data_expanded_rows) | |
# Calculate counts per year, and filter years 2010-2017 | |
output <- data_all %>% | |
group_by(Type, Unit, Start) %>% | |
summarise(Count = n()) %>% | |
filter(as.double(Start), between(as.double(Start), 2010, 2017)) | |
output$Start <- as.character(output$Start) | |
remove(to_expand, not_to_expand, expanded_rows, data_expanded_rows, data_all) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment