Skip to content

Instantly share code, notes, and snippets.

@patperu
Created July 26, 2021 09:27
Show Gist options
  • Save patperu/0be88e7799fb97f0a910dcc93921851f to your computer and use it in GitHub Desktop.
Save patperu/0be88e7799fb97f0a910dcc93921851f to your computer and use it in GitHub Desktop.
title date editor_options
Stickstoffdioxid am Theodor-Heuss-Ring, Kiel (1h-Mittelwerte)
25 7 2021
chunk_output_type
console
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.3     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(usethis)
library(httr)
library(hms)
library(jsonlite)
## 
## Attache Paket: 'jsonlite'
## Das folgende Objekt ist maskiert 'package:purrr':
## 
##     flatten
library(janitor)
## 
## Attache Paket: 'janitor'
## Die folgenden Objekte sind maskiert von 'package:stats':
## 
##     chisq.test, fisher.test
library(lubridate)
## 
## Attache Paket: 'lubridate'
## Das folgende Objekt ist maskiert 'package:hms':
## 
##     hms
## Die folgenden Objekte sind maskiert von 'package:base':
## 
##     date, intersect, setdiff, union
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
##       Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
##       if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
knitr::opts_knit$set(upload.fun = knitr::imgur_upload, base.url = NULL)
knitr::opts_chunk$set(fig.width=unit(15, "cm"), fig.height=unit(11, "cm"))
# https://opendata.schleswig-holstein.de/dataset/stickstoffdioxid-kiel-theodor-heuss-ring-1-stunden-mittelwert-2021

# API-Doku: https://updatedeutschland.org/wp-content/uploads/2021/03/Schnittstellenbeschreibung-Luftdaten_API_2019_09_12.pdf

get_uba_airquality <- function(station, year, component = 5) {
  
  Sys.sleep(5)
  
  usethis::ui_info("Fetching year {ui_value(year)}")
  
  date_from <- paste0(year, "-01-01")
  date_to <- paste0(year, "-12-31")

  out <- httr::VERB(verb = "GET", 
             url = "https://www.umweltbundesamt.de/api/air_data/v2/measures/json", 
             query = list(station = station, # "1579" = Theodor-Heuss-Ring 
                          component = component, # Schadstoffe
                          scope = "2",     # Auswertungen
                          date_from = date_from, 
                          time_from = "1", 
                          date_to = date_to, 
                          time_to = "24", 
                          lang = "de"))
  
  out <- httr::content(out, "text", encoding = "UTF-8")

  jsonlite::fromJSON(out)

}
no2_plot <- function(.data) {

  year <- .data$year[1]

  caption <- "Quelle: Umweltbundesamt mit Daten der Messnetze der Länder und des Bundes\nLetzter Messwert: "
  caption <- paste0(caption, max(.data$date_end))
  
  .data %>% 
  ggplot(aes(date_end, value, group = factor(month), color = factor(month))) + 
  geom_line(alpha = 0.35) + 
  geom_smooth(size = 1.45) + 
  geom_hline(yintercept = 40, colour="#990000", linetype="dashed") +
  scale_y_continuous(limits=c(0, 230)) +
  scale_x_datetime(date_breaks = "1 month", date_minor_breaks = "1 week", date_labels = "%B") + 
  hrbrthemes::theme_ipsum_rc() + 
  labs(title = paste0("Stickstoffdioxid-Belastung am Theodor-Heuss-Ring, Kiel - ", year),
       x = NULL,
       y = expression(paste("[",mu,"g/",m^3, "]")),
       subtitle = expression(paste("Basis: 1h-Mittelwerte, gestrichelte Linie = Grenzwert von 40 ", mu ,"g/",m^3)),
       caption = caption) + 
  theme(legend.position = "none")  

}
prepare_data <- function(.data) {

  station <- as.numeric(.data$request$station)
  
  # urg...
  out <- data.frame(matrix(unlist(.data$data), 
                              ncol = 5, 
                              byrow = TRUE), 
                       stringsAsFactors = FALSE) %>% 
    tibble() %>% 
    set_names(.data$indices$data$`station id`$`date start`) %>% 
    janitor::clean_names() 
  
  out %>% 
    mutate(across(c(component_id, scope_id, value, index), as.numeric), 
           date_end = lubridate::ymd_hms(date_end), 
           year = lubridate::year(date_end), 
           month = lubridate::month(date_end),
           station = station)
}

Get "Stickstoffdioxid am Theodor-Heuss-Ring, Kiel", 1h-Mittelwerte, 2018-2021

no2_list <- map(2018:2021, get_uba_airquality, station = 1579, component = 5)
## ℹ Fetching year 2018
## ℹ Fetching year 2019
## ℹ Fetching year 2020
## ℹ Fetching year 2021

Prepare dataset

no2_df <- map_df(no2_list, prepare_data)

Last five observations

tail(no2_df, 5)
## # A tibble: 5 × 8
##   component_id scope_id value date_end            index  year month station
##          <dbl>    <dbl> <dbl> <dttm>              <dbl> <dbl> <dbl>   <dbl>
## 1            5        2    52 2021-07-26 05:00:00     2  2021     7    1579
## 2            5        2    64 2021-07-26 06:00:00     3  2021     7    1579
## 3            5        2    67 2021-07-26 07:00:00     3  2021     7    1579
## 4            5        2    68 2021-07-26 08:00:00     3  2021     7    1579
## 5            5        2    NA 2021-07-26 09:00:00    NA  2021     7    1579

Number of observations by year and month

no2_df %>% 
  count(station, year, month) %>% 
  pivot_wider(names_from = year, values_from = n)
## # A tibble: 12 × 6
##    station month `2018` `2019` `2020` `2021`
##      <dbl> <dbl>  <int>  <int>  <int>  <int>
##  1    1579     1    743    744    744    744
##  2    1579     2    672    672    696    672
##  3    1579     3    744    744    744    744
##  4    1579     4    720    720    720    720
##  5    1579     5    744    744    744    744
##  6    1579     6    720    720    720    720
##  7    1579     7    744    744    744    610
##  8    1579     8    744    744    744     NA
##  9    1579     9    720    720    720     NA
## 10    1579    10    744    744    744     NA
## 11    1579    11    720    720    720     NA
## 12    1579    12    744    744    744     NA

Mean by year and month

no2_df %>% 
  group_by(station, year, month) %>% 
  summarise(mean_value = mean(value, na.rm = TRUE), .groups = "drop") %>% 
  pivot_wider(names_from = year, values_from = mean_value)
## # A tibble: 12 × 6
##    station month `2018` `2019` `2020` `2021`
##      <dbl> <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1    1579     1   47.7   51.6   33.7   33.7
##  2    1579     2   73.3   50.2   32.2   37.9
##  3    1579     3   71.6   42.8   46.6   37.4
##  4    1579     4   64.2   67.3   40.7   46.6
##  5    1579     5   81.1   49.8   35.3   38.3
##  6    1579     6   64.1   54.1   40.9   49.4
##  7    1579     7   68.7   43.2   25.2   41.6
##  8    1579     8   55.1   51.8   44.3   NA  
##  9    1579     9   51.2   43.5   39.3   NA  
## 10    1579    10   54.3   45.5   19.9   NA  
## 11    1579    11   52.0   47.2   17.6   NA  
## 12    1579    12   41.0   38.5   30.0   NA

Mean by year

no2_df %>%
  group_by(station, year) %>% 
  summarise(mean_value = mean(value, na.rm = TRUE), .groups = "drop")
## # A tibble: 4 × 3
##   station  year mean_value
##     <dbl> <dbl>      <dbl>
## 1    1579  2018       60.3
## 2    1579  2019       48.8
## 3    1579  2020       33.8
## 4    1579  2021       40.6

Mean last 12 months

no2_df %>%
  filter(between(date_end, max(date_end) %m+% years(-1), max(date_end))) %>%
  group_by(station) %>% 
  summarise(mean_value = mean(value, na.rm = TRUE), .groups = "drop")
## # A tibble: 1 × 2
##   station mean_value
##     <dbl>      <dbl>
## 1    1579       36.0

Plot 2021

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

plot of chunk unnamed-chunk-12

Stundenmittelwerte der Jahre 2018 - 2021 nach Monat und Jahr

caption <- "Quelle: Umweltbundesamt mit Daten der Messnetze der Länder und des Bundes\nLetzter Messwert: "
caption <- paste0(caption, max(no2_df$date_end))

no2_df %>%
  mutate(
         month = lubridate::month(date_end, abbr = TRUE, label = TRUE),
         daytime = as.POSIXct(paste("1970-01-01", hms::as_hms(date_end)))
         ) %>% 
  group_by(year, month, daytime) %>% 
  summarise(anzahl = n(), 
            mean_no = mean(value, na.rm = TRUE), .groups = "drop") %>% 
  ggplot(aes(daytime, mean_no, group = factor(year), color = factor(year))) + 
  geom_line() + 
  geom_hline(yintercept = 40, colour="#990000", linetype="dashed") +
  facet_wrap(. ~ month) + 
  scale_x_datetime(expand = c(0, 0),
                   date_breaks = "3 hours", 
                   date_minor_breaks = "3 hours", 
                   date_labels = "%H:%M"
                   ) + 
  hrbrthemes::theme_ipsum_rc() + 
  labs(title = "Stickstoffdioxid-Belastung am Theodor-Heuss-Ring, Kiel",
       x = NULL,
       color = "Jahr",
       y = expression(paste("[",mu,"g/",m^3, "]")),
       subtitle = expression(paste("Basis: 1h-Mittelwerte, gestrichelte Linie = Grenzwert von 40 ", mu ,"g/",m^3)),
       caption = caption) + 
  theme(legend.position = "top")  

plot of chunk unnamed-chunk-13

Save the data

saveRDS(no2_df, "results/no2_df.RDS")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment