Skip to content

Instantly share code, notes, and snippets.

@robsalasco
Last active January 30, 2019 19:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save robsalasco/a3bb807dac617a9fad663184793bf888 to your computer and use it in GitHub Desktop.
Save robsalasco/a3bb807dac617a9fad663184793bf888 to your computer and use it in GitHub Desktop.
# Proof of concept - Extract data from REDATAM web interface (CENSUS 2017) directly to R
library(httr)
library(rvest)
headers <- add_headers(
"User-Agent" = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36",
"Referer" = "https://redatam-ine.ine.cl/redbin/RpWebStats.exe/AreaList?BASE=CENSO_2017&ITEM=AREAPOB&lang=esp",
"Host" = "redatam-ine.ine.cl",
"X-Requested-With" = "XMLHttpRequest"
)
body <- 'MAIN=WebServerMain.inl&BASE=CENSO_2017&LANG=esp&CODIGO=XXUSUARIOXX&ITEM=AREAPOB&MODE=RUN&inputTitle=&VARIABLE=PERSONA.P08&OUTPUT=COMUNA&SELECTION=ALL&INLINESELECTION=&FORMAT=HTML&TOTCOL=on&UNIVERSE=&FILTER=&TEXT_FILTER=&Submit=Ejecutar'
data = POST("https://redatam-ine.ine.cl/redbin/RpWebStats.exe/AreaList?", body = body, headers)
tmp_doc = read_html(content(data, "text")) %>% html_nodes("iframe") %>% html_attr("src")
data_census = GET(tmp_doc, headers)
data_final = read_html(content(data_census, "text")) %>% html_nodes(xpath='//*/table') %>% html_table()
data_final <- data_final[[1]]
data_final <- data_final[10:nrow(data_final), ]
data_final <- as.data.frame(data_final,stringsAsFactors=F)
data_final <- data_final[!(data_final$X2=="Fuente: Censo 2017" | data_final$X2 == "Procesado con Redatam WebServer" | data_final$X2 == "2017. CELADE/CEPAL, Naciones Unidas"),]
data_final <- data_final[!sapply(data_final, function(x) all(x == ""))]
colnames(data_final) <- as.character(unlist(data_final[1,]))
data_final <- data_final[-1, ]
data_final[, 3:ncol(data_final)] <- sapply(3:ncol(data_final), function(x) {gsub("\\s", "", data_final[, x])})
data_final <- data_final[!apply(data_final == "", 1, any), ]
data_final
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment