Skip to content

Instantly share code, notes, and snippets.

@BroVic
Last active March 18, 2020 13:57
Show Gist options
  • Save BroVic/3c5014dd5e5e1b4723f7d75885d17203 to your computer and use it in GitHub Desktop.
Save BroVic/3c5014dd5e5e1b4723f7d75885d17203 to your computer and use it in GitHub Desktop.
A Makefile for cleaning and transforming data, generating HTML codebooks and building MS Word reports
## -- Makefile -- ##
##
## MIT License
##
## Copyright (c) 2019 Victor Ordu
##
## -------------- ##
ROOTDIR = ./
DWNDIR = $(ROOTDIR)downloads/
DATADIR = $(ROOTDIR)data/
QUANTDIR = $(DATADIR)quant/
QUALDIR = $(DATADIR)qual/
SRCDIR = $(ROOTDIR)src/
OUTDIR = $(ROOTDIR)doc/output/
CLEANDIR = $(SRCDIR)clean/
CODESDIR = $(SRCDIR)coding/
all_states := Akwa-Ibom Ogun Bauchi Abia
all_sectors := Health Social Judicial Legal Security Temporary Referral
# true_sectors := $(subst Referral,,$(all_sectors))
RCMD = Rscript.exe
RFLAGS = --vanilla
REQ_RAAMP_PKG = library(raampGBV)
# Filepath strings
REPRT_STR = findings_
CDBK_STR = codebook_
CDBK_PATTERN := $(wildcard codebook_*_*.html)
# R scripts
TRANSFORM_R = 03_transform.R
MERGE_R = 04_merge.R
REFDIR_R = 0x_build_refdir.R
# Data
# Initialize with downloaded REDCap files
both_datafile_types := modifiedRawData__ metadata__
ALL_INIT_OUTPUT := $(foreach filetype,$(both_datafile_types), $(foreach state,$(all_states), $(foreach sector,$(all_sectors),$(QUANTDIR)$(state)/$(filetype)$(state)_$(sector).rds)))
## Labelled data
FNAME_LBL = fullyLabelledData__
ALL_LABELLED_RDS := $(foreach state,$(all_states),$(foreach sector,$(all_sectors),$(QUANTDIR)$(state)/$(FNAME_LBL)$(state)_$(sector).rds))
## Tranformed data in serialization format
FNAME_TRANS := transformed_data__
define transform-rule
$(foreach sector,$(all_sectors),$(QUANTDIR)$(1)/$(2)$(1)_$(sector).rds): $(CLEANDIR)$(TRANSFORM_R) $(foreach sector,$(all_sectors),$(QUANTDIR)$(1)/$(3)$(1)_$(sector).rds)
$(RCMD) $$< $(subst -, ,$(1))
endef
## Merged data - variables common to all sectors
FNAME_MRG = combined_data_common_quest_
define merge-rule
$(QUANTDIR)$(1)/$(FNAME_MRG)$(1)_.rds: $(CLEANDIR)$(2) $(foreach sector,$(all_sectors),$(QUANTDIR)$(1)/$(3)$(1)_$(sector).rds)
$(RCMD) $$< $(subst -, ,$(1))
endef
## Reference directory
FNAME_REFDIR = built_refdir_data__
CSV_NAME = Referral_Directory_
define refdir-rule
$(QUANTDIR)$(1)/$(FNAME_REFDIR)$(1)_.rds $(QUANTDIR)$(1)/$(CSV_NAME)$(1).csv: $(CLEANDIR)$(REFDIR_R) $(foreach sector,$(all_sectors),$(QUANTDIR)$(1)/$(FNAME_TRANS)$(1)_$(sector).rds)
$(RCMD) $$< $(subst -, ,$(1))
endef
# For the codebooks
ALL_CDBKS := $(foreach state,$(all_states),$(foreach sector,$(all_sectors),$(OUTDIR)$(state)/$(CDBK_STR)$(state)_$(sector).html))
define codebook-rule
$(OUTDIR)$(1)/$(CDBK_STR)$(1)_$(2).html: $(QUANTDIR)$(1)/$(FNAME_LBL)$(1)_$(2).rds
$(RCMD) -e "$(REQ_RAAMP_PKG); build_codebook('$(subst -, ,$(1))', '$(2)', '$(@D)')"
endef
# For the reports
ALL_REPORTS = $(foreach state,$(all_states),$(OUTDIR)$(state)/$(REPRT_STR)$(subst -, ,$(state)).docx)
define report-rule
$(OUTDIR)$(1)/$(REPRT_STR)$(1).docx: $(SRCDIR)rep/$(REPRT_STR)$(1).Rmd
$(RCMD) -e "$(REQ_RAAMP_PKG); build_report('$$(@D)', '$(subst -, ,$(1))')"
endef
#####################################################################
#
# The Rules
#
all: data codebooks reports
.Phony:
codebooks: $(ALL_CDBKS)
$(foreach state,$(all_states),$(foreach sector,$(all_sectors),$(eval $(call codebook-rule,$(state),$(sector)))))
.Phony:
reports: $(ALL_REPORTS)
$(foreach state,$(all_states),$(eval $(call report-rule,$(state))))
.Phony:
data: transform merge refdir
.Phony:
refdir: $(foreach state,$(all_states),$(QUANTDIR)$(state)/$(FNAME_REFDIR)$(state)_.rds) \
$(foreach state,$(all_states),$(QUANTDIR)$(state)/$(CSV_NAME)$(state).csv)
$(foreach state,$(all_states),$(eval $(call refdir-rule,$(state))))
.Phony:
merge: $(foreach state,$(all_states),$(QUANTDIR)$(state)/$(FNAME_MRG)$(state)_.rds)
$(foreach state,$(all_states),$(eval $(call merge-rule,$(state),$(MERGE_R),$(FNAME_TRANS))))
.Phony:
transform: $(foreach state,$(all_states),$(foreach sector,$(all_sectors),$(QUANTDIR)$(state)/$(FNAME_TRANS)$(state)_$(sector).rds))
$(foreach state,$(all_states),$(eval $(call transform-rule,$(state),$(FNAME_TRANS),$(FNAME_LBL))))
# The initial 2 steps in the process generate all the intermediate data, each via a single R script.
# Thus all the targets are recreated together, once there's a change in any of the dependencies.
# TODO: Uncouple this to build on a state-by-state basis
.Phony:
labels: $(ALL_LABELLED_RDS)
$(ALL_LABELLED_RDS): $(CLEANDIR)02_values_labels.R $(ALL_INIT_OUTPUT)
$(RCMD) $<
.Phony:
init: $(ALL_INIT_OUTPUT)
$(ALL_INIT_OUTPUT): $(CLEANDIR)01_vars_labels.R $(DWNDIR)*/*.csv $(DWNDIR)*/*.r
$(RCMD) $<
.Phony:
clean:
rm -f $(foreach state,$(all_states),$(OUTDIR)$(state)/*.*)
rm -rf $(QUANTDIR)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment