Last active
August 21, 2016 14:48
-
-
Save chringel21/2bb1345c9a07d1f5ac64c56f1886d475 to your computer and use it in GitHub Desktop.
Fetch data from the Berlin city measuring network BLUME for air quality using R. For the purpose of spatial plotting, the function adds geographical coordinates to the measuring stations.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Christian Kuntzsch | |
## christian.kuntzsch.me | |
## github.com/DeEgge | |
## 2016-08-21 | |
blume <- getBlume(start = "2010-07-20", end = "2016-07-20") | |
getBlume <- function(start, end) { | |
require(rvest) | |
# Create empty data.frame | |
table <- data.frame() | |
# create character sequence out of start and end date | |
start <- as.Date(start, format = "%Y-%m-%d") | |
end <- as.Date(end, format = "%Y-%m-%d") | |
date_sequence <- seq(start, end, "day") | |
date_sequence_char <- format(date_sequence, "%Y%m%d") | |
# Loop iterating over URLs | |
# sequence iterates over 31 days, specify month/year seperatly in URL | |
for (i in 1:length(date_sequence_char)) { | |
# print console log output | |
Sys.sleep(0.1) | |
print(paste("Fetching data for", date_sequence[i], "...", sep = " ")) | |
# define and read URL | |
#for (i in 210:214){ | |
url <- paste("http://www.stadtentwicklung.berlin.de/umwelt/luftqualitaet/de/messnetz/tageswerte/download/", date_sequence_char[i], ".html", sep = "") | |
file <- try(read_html(url), silent = TRUE) | |
if('try-error' %in% class(file)) next | |
# parse and clean URL's content | |
x <- html_table(html_nodes(file, "table")[[3]], header = TRUE) | |
x <- x[-c(1,19,12),-c(3,5,7,9,11,13,15)] | |
x[x == "---"] = NA | |
x[,c(2:8)] <- as.numeric(unlist(x[,c(2:8)])) | |
x[,1] <- as.factor(substring(x[,1], 5)) | |
# temperature | |
temp <- html_text(html_nodes(file, xpath = "//b[3]"))[2] | |
temp <- strsplit(temp, " ") | |
temp <- as.numeric(temp[[1]][1]) | |
x$"Temperature (°C)" <- temp | |
# wind | |
wind <- html_text(html_nodes(file, xpath = "//b[2]"))[2] | |
wind <- strsplit(wind, " ") | |
wind <- as.numeric(wind[[1]][2]) | |
x$"Wind velocity (m/s)" <- wind | |
# direction of wind | |
wind_direction <- html_text(html_nodes(file, xpath = "//p[2]//b[1]"))[1] | |
wind_direction <- strsplit(wind_direction, " ") | |
wind_direction <- as.numeric(wind_direction[[1]][1]) | |
x$"Wind direction (°)" <- wind_direction | |
# relative air humidity | |
humidity <- html_text(html_nodes(file, xpath = "//b[4]")) | |
humidity <- strsplit(humidity, " ") | |
humidity <- as.numeric(humidity[[1]][1]) | |
x$"Relative humidity (%)" <- humidity | |
# add column for date | |
x$Date <- as.Date(date_sequence_char[i], format = "%Y%m%d") | |
# bind table | |
colnames(x) <- c("Station", | |
"Particles PM10", | |
"Soot", | |
"Nitrogen dioxide", | |
"Benzol", | |
"Carbon monoxide", | |
"Ozone", | |
"Sulphur dioxide", | |
"Temperature", | |
"Wind velocity", | |
"Wind direction", | |
"Relative humidity", | |
"Date") | |
table <- rbind(table, x) | |
#print(table) | |
} | |
require(data.table) | |
DT <- data.table(table) | |
DT[, lat := ifelse(Station == table$Station[1], 52.542383, | |
ifelse(Station == table$Station[2], 52.485814, | |
ifelse(Station == table$Station[3], 52.398406, | |
ifelse(Station == table$Station[4], 52.473192, | |
ifelse(Station == table$Station[5], 52.489439, | |
ifelse(Station == table$Station[6], 52.643525, | |
ifelse(Station == table$Station[7], 52.447697, | |
ifelse(Station == table$Station[8], 52.653269, | |
ifelse(Station == table$Station[9], 52.513606, | |
ifelse(Station == table$Station[10], 52.485261, | |
ifelse(Station == table$Station[11], 52.5066, | |
ifelse(Station == table$Station[12], 52.463611, | |
ifelse(Station == table$Station[13], 52.438056, | |
ifelse(Station == table$Station[14], 52.467511, | |
ifelse(Station == table$Station[15], 52.514072, | |
ifelse(Station == table$Station[16], 52.481669, NA))))))))))))))))] | |
DT[, lng := ifelse(Station == table$Station[1], 13.3493, | |
ifelse(Station == table$Station[2], 13.348775, | |
ifelse(Station == table$Station[3], 13.368103, | |
ifelse(Station == table$Station[4], 13.225144, | |
ifelse(Station == table$Station[5], 13.430856, | |
ifelse(Station == table$Station[6], 13.489531, | |
ifelse(Station == table$Station[7], 13.64705, | |
ifelse(Station == table$Station[8], 13.296081, | |
ifelse(Station == table$Station[9], 13.418833, | |
ifelse(Station == table$Station[10], 13.529503, | |
ifelse(Station == table$Station[11], 13.332972, | |
ifelse(Station == table$Station[12], 13.31825, | |
ifelse(Station == table$Station[13], 13.3875, | |
ifelse(Station == table$Station[14], 13.44165, | |
ifelse(Station == table$Station[15], 13.469931, | |
ifelse(Station == table$Station[16], 13.433967, NA))))))))))))))))] | |
table <- data.frame(DT) | |
return(table) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment