Skip to content

Instantly share code, notes, and snippets.

@BroVic
Created June 17, 2021 19:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save BroVic/783c0d4943b18ed8b3c17e32b9278ade to your computer and use it in GitHub Desktop.
Save BroVic/783c0d4943b18ed8b3c17e32b9278ade to your computer and use it in GitHub Desktop.
library(readxl)
library(tidyr)
# My Path. Use yours
path <-
"~/Documents/5-Personal/Data/DATA DHIS MONTHLY RI TEMPLATE 5.xlsx"
riData <- read_xlsx(path = path)
riData <- riData[-c(2:3),]
hdr <- unname(unlist(riData[1,]))
names(riData) <- hdr
riData
riData <- riData[-1, ]
riData
# keep rows that are not all empty
riData <-
riData[rowSums(is.na(riData)) != ncol(riData),]
# Check types of each column
unname(sapply(riData, typeof))
# all our columms are of type 'character', in spite of the fact that we
# obviously have numerical data in the table. This happened because of the
# way the data were imported ab initio - strings that were meant to be part
# of the header, were taken as part of the data. This led to the implicit
# conversion of numbers into strings. To remedy this, we could change every
# column with 'numbers' to numerical type or we can just do that when we are
# working with a particular column i.e. on a variable- by- variable basis.
# Check the column names again
colnames(riData)
# Get Penta columns for DHIS
pentacols <-
grep("DHIS2 PENTA", colnames(riData)) # see ?regex and ?grep
# Convert the relevant columns from character to numeric data
for (i in pentacols) {
# for-loop
riData[[i]] <- as.numeric(riData[[i]])
}
selectcols <-
c(2:3, pentacols) # 2nd and rrc column, combined with the PENTA
pentaData <- riData[, selectcols]
# Fill in the empty WARD cells
pentaData <- fill(pentaData, WARD) # from tidyr package
# Get the sum of all the PENTAs for each ward
groupingVar <- factor(pentaData$WARD)
pentaData <-
aggregate(pentaData[, 3:5], by = list(WARD = groupingVar), FUN = "sum")
mat <- data.matrix(pentaData[, 2:4])
rownames(mat) <- pentaData$WARD
class(mat) # This is a matrix, not a data frame
t.mat <- t(mat)
hue <- c("blue", "red", "green")
barplot(
t.mat,
beside = TRUE,
col = hue,
main = "Distribution of PENTA utilization by Ward",
xlab = "Ward",
ylab = "Number of Clients"
)
legend("topleft", legend = rownames(t.mat), fill = hue)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment