Skip to content

Instantly share code, notes, and snippets.

@chringel21
Last active August 21, 2016 14:48
Show Gist options
  • Save chringel21/2bb1345c9a07d1f5ac64c56f1886d475 to your computer and use it in GitHub Desktop.
Save chringel21/2bb1345c9a07d1f5ac64c56f1886d475 to your computer and use it in GitHub Desktop.
Fetch data from the Berlin city measuring network BLUME for air quality using R. For the purpose of spatial plotting, the function adds geographical coordinates to the measuring stations.
## Christian Kuntzsch
## christian.kuntzsch.me
## github.com/DeEgge
## 2016-08-21
blume <- getBlume(start = "2010-07-20", end = "2016-07-20")
getBlume <- function(start, end) {
require(rvest)
# Create empty data.frame
table <- data.frame()
# create character sequence out of start and end date
start <- as.Date(start, format = "%Y-%m-%d")
end <- as.Date(end, format = "%Y-%m-%d")
date_sequence <- seq(start, end, "day")
date_sequence_char <- format(date_sequence, "%Y%m%d")
# Loop iterating over URLs
# sequence iterates over 31 days, specify month/year seperatly in URL
for (i in 1:length(date_sequence_char)) {
# print console log output
Sys.sleep(0.1)
print(paste("Fetching data for", date_sequence[i], "...", sep = " "))
# define and read URL
#for (i in 210:214){
url <- paste("http://www.stadtentwicklung.berlin.de/umwelt/luftqualitaet/de/messnetz/tageswerte/download/", date_sequence_char[i], ".html", sep = "")
file <- try(read_html(url), silent = TRUE)
if('try-error' %in% class(file)) next
# parse and clean URL's content
x <- html_table(html_nodes(file, "table")[[3]], header = TRUE)
x <- x[-c(1,19,12),-c(3,5,7,9,11,13,15)]
x[x == "---"] = NA
x[,c(2:8)] <- as.numeric(unlist(x[,c(2:8)]))
x[,1] <- as.factor(substring(x[,1], 5))
# temperature
temp <- html_text(html_nodes(file, xpath = "//b[3]"))[2]
temp <- strsplit(temp, " ")
temp <- as.numeric(temp[[1]][1])
x$"Temperature (°C)" <- temp
# wind
wind <- html_text(html_nodes(file, xpath = "//b[2]"))[2]
wind <- strsplit(wind, " ")
wind <- as.numeric(wind[[1]][2])
x$"Wind velocity (m/s)" <- wind
# direction of wind
wind_direction <- html_text(html_nodes(file, xpath = "//p[2]//b[1]"))[1]
wind_direction <- strsplit(wind_direction, " ")
wind_direction <- as.numeric(wind_direction[[1]][1])
x$"Wind direction (°)" <- wind_direction
# relative air humidity
humidity <- html_text(html_nodes(file, xpath = "//b[4]"))
humidity <- strsplit(humidity, " ")
humidity <- as.numeric(humidity[[1]][1])
x$"Relative humidity (%)" <- humidity
# add column for date
x$Date <- as.Date(date_sequence_char[i], format = "%Y%m%d")
# bind table
colnames(x) <- c("Station",
"Particles PM10",
"Soot",
"Nitrogen dioxide",
"Benzol",
"Carbon monoxide",
"Ozone",
"Sulphur dioxide",
"Temperature",
"Wind velocity",
"Wind direction",
"Relative humidity",
"Date")
table <- rbind(table, x)
#print(table)
}
require(data.table)
DT <- data.table(table)
DT[, lat := ifelse(Station == table$Station[1], 52.542383,
ifelse(Station == table$Station[2], 52.485814,
ifelse(Station == table$Station[3], 52.398406,
ifelse(Station == table$Station[4], 52.473192,
ifelse(Station == table$Station[5], 52.489439,
ifelse(Station == table$Station[6], 52.643525,
ifelse(Station == table$Station[7], 52.447697,
ifelse(Station == table$Station[8], 52.653269,
ifelse(Station == table$Station[9], 52.513606,
ifelse(Station == table$Station[10], 52.485261,
ifelse(Station == table$Station[11], 52.5066,
ifelse(Station == table$Station[12], 52.463611,
ifelse(Station == table$Station[13], 52.438056,
ifelse(Station == table$Station[14], 52.467511,
ifelse(Station == table$Station[15], 52.514072,
ifelse(Station == table$Station[16], 52.481669, NA))))))))))))))))]
DT[, lng := ifelse(Station == table$Station[1], 13.3493,
ifelse(Station == table$Station[2], 13.348775,
ifelse(Station == table$Station[3], 13.368103,
ifelse(Station == table$Station[4], 13.225144,
ifelse(Station == table$Station[5], 13.430856,
ifelse(Station == table$Station[6], 13.489531,
ifelse(Station == table$Station[7], 13.64705,
ifelse(Station == table$Station[8], 13.296081,
ifelse(Station == table$Station[9], 13.418833,
ifelse(Station == table$Station[10], 13.529503,
ifelse(Station == table$Station[11], 13.332972,
ifelse(Station == table$Station[12], 13.31825,
ifelse(Station == table$Station[13], 13.3875,
ifelse(Station == table$Station[14], 13.44165,
ifelse(Station == table$Station[15], 13.469931,
ifelse(Station == table$Station[16], 13.433967, NA))))))))))))))))]
table <- data.frame(DT)
return(table)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment