Skip to content

Instantly share code, notes, and snippets.

@jmcastagnetto
Created February 6, 2015 15:05
Show Gist options
  • Save jmcastagnetto/7bf43be8aebd0ab6c1bc to your computer and use it in GitHub Desktop.
Save jmcastagnetto/7bf43be8aebd0ab6c1bc to your computer and use it in GitHub Desktop.
Code to cleanup and read WHO immunization monitoring tables into R data frames.
require(XML)
require(reshape2)
# node content cleanup
cleanup <- function(node) {
val <- xmlValue(node)
if(is.character(val) | is.factor(val)) {
out <- gsub("'","", val, fixed=TRUE)
} else {
out <- val
}
gsub("\U3e32633c","", gsub("\xa0","", out, useBytes=TRUE))
}
# read table, parse, remove extraneous rows and put into a tidy data set
who2df <- function(url) {
tbs <- readHTMLTable(doc=url, elFun=cleanup, trim=TRUE, stringsAsFactors=FALSE)
df <- tbs[[2]][-c(1,2),]
fields <- df[1,]
df <- df[-1,]
colnames(df) <- fields
df <- df[-nrow(df),]
df[df=="_"] <- NA
for (fn in fields[-1]) {
df[,fn] <- as.numeric(df[,fn])
}
df2 <- melt(df, id.vars="Diseases", variable.name="year", value.name="cases")
df2$year <- as.numeric(as.character(df2$year))
df2
}
# examples reading data for Peru (PER) and United States (USA)
url_who <- "http://apps.who.int/immunization_monitoring/globalsummary/incidences?c="
url_pe <- paste0(url_who, "PER")
url_us <- paste0(url_who, "USA")
df_pe <- who2df(url_pe)
df_us <- who2df(url_us)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment