Created
June 17, 2021 19:17
-
-
Save BroVic/783c0d4943b18ed8b3c17e32b9278ade to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(readxl) | |
library(tidyr) | |
# My Path. Use yours | |
path <- | |
"~/Documents/5-Personal/Data/DATA DHIS MONTHLY RI TEMPLATE 5.xlsx" | |
riData <- read_xlsx(path = path) | |
riData <- riData[-c(2:3),] | |
hdr <- unname(unlist(riData[1,])) | |
names(riData) <- hdr | |
riData | |
riData <- riData[-1, ] | |
riData | |
# keep rows that are not all empty | |
riData <- | |
riData[rowSums(is.na(riData)) != ncol(riData),] | |
# Check types of each column | |
unname(sapply(riData, typeof)) | |
# all our columms are of type 'character', in spite of the fact that we | |
# obviously have numerical data in the table. This happened because of the | |
# way the data were imported ab initio - strings that were meant to be part | |
# of the header, were taken as part of the data. This led to the implicit | |
# conversion of numbers into strings. To remedy this, we could change every | |
# column with 'numbers' to numerical type or we can just do that when we are | |
# working with a particular column i.e. on a variable- by- variable basis. | |
# Check the column names again | |
colnames(riData) | |
# Get Penta columns for DHIS | |
pentacols <- | |
grep("DHIS2 PENTA", colnames(riData)) # see ?regex and ?grep | |
# Convert the relevant columns from character to numeric data | |
for (i in pentacols) { | |
# for-loop | |
riData[[i]] <- as.numeric(riData[[i]]) | |
} | |
selectcols <- | |
c(2:3, pentacols) # 2nd and rrc column, combined with the PENTA | |
pentaData <- riData[, selectcols] | |
# Fill in the empty WARD cells | |
pentaData <- fill(pentaData, WARD) # from tidyr package | |
# Get the sum of all the PENTAs for each ward | |
groupingVar <- factor(pentaData$WARD) | |
pentaData <- | |
aggregate(pentaData[, 3:5], by = list(WARD = groupingVar), FUN = "sum") | |
mat <- data.matrix(pentaData[, 2:4]) | |
rownames(mat) <- pentaData$WARD | |
class(mat) # This is a matrix, not a data frame | |
t.mat <- t(mat) | |
hue <- c("blue", "red", "green") | |
barplot( | |
t.mat, | |
beside = TRUE, | |
col = hue, | |
main = "Distribution of PENTA utilization by Ward", | |
xlab = "Ward", | |
ylab = "Number of Clients" | |
) | |
legend("topleft", legend = rownames(t.mat), fill = hue) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment