Skip to content

Instantly share code, notes, and snippets.

@abmathewks
Created October 8, 2021 16:20
Show Gist options
  • Save abmathewks/6e0b66456e9531c91a91454986cea0fb to your computer and use it in GitHub Desktop.
Save abmathewks/6e0b66456e9531c91a91454986cea0fb to your computer and use it in GitHub Desktop.
AddMissingDates <- function(
AGG_TS_DATA = FINAL_DATA[data_type == "TRAIN"],
DATE_COLUMN = "date_ymd",
TARGET_COLUMN = "visits",
OTHER_COLUMNS = c("revenue_segment", "line", "serviceline_segment",
"finance_group", "finance_sub_group"),
DEBUG = TRUE){
if(DEBUG) message("AddMissingDates: Function Initialized \n")
if(!is.data.table(AGG_TS_DATA)){
stop("AddMissingDates: The input data is not a data table \n")
}
if(!class(AGG_TS_DATA[[DATE_COLUMN]]) %chin% c("Date")) {
stop("AddMissingDates: The date column is not saved in date format \n")
}
FUNCTION_OUTPUT <- list()
ALL_FEATURES_TO_FILL <- unique(c(TARGET_COLUMN, OTHER_COLUMNS))
if(all(ALL_FEATURES_TO_FILL %in% colnames(AGG_TS_DATA))){
all_date_info <- AGG_TS_DATA[, .(min_date = min(AGG_TS_DATA[[DATE_COLUMN]]),
max_date = max(AGG_TS_DATA[[DATE_COLUMN]]),
unique_days = uniqueN(AGG_TS_DATA[[DATE_COLUMN]]),
days_between = length(unique(seq.Date(min(AGG_TS_DATA[[DATE_COLUMN]]),
max(AGG_TS_DATA[[DATE_COLUMN]]),
by = "day"))))]
all_date_info[, missing_dates := ifelse(unique_days == days_between, 0, 1)]
if(all_date_info$missing_dates == 1){
all_dates <- data.table(date_ymd = seq.Date(min(as.Date(all_date_info$min_date)),
max(as.Date(all_date_info$max_date)), by = "day"),
value = NA)
colnames(all_dates)[1] <- DATE_COLUMN
# all_dates
AGG_TS_DATA <- merge.data.table(all_dates, AGG_TS_DATA,
by = DATE_COLUMN,
all.x = TRUE)
# AGG_TS_DATA
AGG_TS_DATA[, visits := ifelse(is.na(get(TARGET_COLUMN)), 0.1, get(TARGET_COLUMN))]
# AGG_TS_DATA
FINAL_DATA <- AGG_TS_DATA[, mget(c(DATE_COLUMN, ALL_FEATURES_TO_FILL))]
FUNCTION_OUTPUT[["FINAL_OUTPUT"]] <- FINAL_DATA
if(!is.null(FUNCTION_OUTPUT) || !length(FUNCTION_OUTPUT) == 0){
if(DEBUG) message("CreateFinalData: Data acquisition completed \n ")
return(FUNCTION_OUTPUT)
} else {
stop("CreateFinalData: Function returned an empty data set \n")
}
}
} else {
stop("AddMissingDates: One or more input columns are missing from the dataset")
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment