Skip to content

Instantly share code, notes, and snippets.

@tts
Last active July 10, 2017 10:18
Show Gist options
  • Save tts/e60a4001da5f858ada79b3575b1a3576 to your computer and use it in GitHub Desktop.
Save tts/e60a4001da5f858ada79b3575b1a3576 to your computer and use it in GitHub Desktop.
Transform Pure activities data exported as Excel to be imported into Power BI for reporting
# `dataset` holds the input data for this script
library(dplyr)
names(dataset) <- c("Type", "Unit", "Start", "End")
dataset$Unit <- iconv(dataset$Unit, from = "WINDOWS-1252", to = "UTF-8")
# We need to expand those rows where Start and End span more than one year,
# e.g. 2010-2012 into 2010, 2011, 2012
to_expand <- dataset %>%
filter(Start != End & !is.null(End))
not_to_expand <- dataset %>%
filter(Start == End | is.null(End))
# Function modified from https://stackoverflow.com/a/38501938
duplicate_rows <- function(type, unit, start, end) {
expanded_years <- seq(from=as.integer(start), to=as.integer(end), by=1)
repeated_rows <- data.frame("Type" = type, "Unit" = unit, "Start" = expanded_years, "End" = expanded_years)
repeated_rows
}
expanded_rows <- Map(f = duplicate_rows, to_expand$Type, to_expand$Unit, to_expand$Start, to_expand$End)
data_expanded_rows <- do.call(rbind, expanded_rows)
data_all <- rbind(not_to_expand, data_expanded_rows)
# Calculate counts per year, and filter years 2010-2017
output <- data_all %>%
group_by(Type, Unit, Start) %>%
summarise(Count = n()) %>%
filter(as.double(Start), between(as.double(Start), 2010, 2017))
output$Start <- as.character(output$Start)
remove(to_expand, not_to_expand, expanded_rows, data_expanded_rows, data_all)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment