Created
August 9, 2023 06:43
-
-
Save gerritjandebruin/204dcd7de38997507844c4ba35e1126c to your computer and use it in GitHub Desktop.
Import Landelijk Meetnetwerk Lucht (LML) data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pathlib import Path | |
import requests | |
import pandas as pd | |
from tqdm import tqdm | |
def get_locations(base_url: str = "https://api.luchtmeetnet.nl/open_api/stations"): | |
"""Get all locations from the LML API. | |
Parameters: | |
----------- | |
base_url : str | |
The base URL of the LML API. | |
""" | |
# Empty list to store all data | |
all_data = [] | |
# Starting page | |
page = 1 | |
while True: | |
# Sending a GET request | |
response = requests.get(f"{base_url}?page={page}") | |
# If the GET request is successful, the status code will be 200 | |
if response.status_code == 200: | |
# Convert the response to JSON | |
data = response.json() | |
# Add the data from the current page to all_data | |
all_data.extend(data["data"]) | |
# If there is a next page, update the page number, else break the loop | |
if data["pagination"]["last_page"] and (page != data["pagination"]["last_page"]) and data["pagination"]["next_page"]: | |
page = data["pagination"]["next_page"] | |
else: | |
break | |
else: | |
raise Exception(f"Failed to get data for page {page}, got status code {response.status_code}") | |
# Convert all_data to a pandas DataFrame | |
return pd.DataFrame(all_data) | |
def get_location_info(station_number: str, base_url: str = "https://api.luchtmeetnet.nl/open_api/stations/") -> dict: | |
"""Get location information from the LML API. | |
Parameters: | |
----------- | |
base_url : str | |
The base URL of the LML API. | |
station_number : str | |
The station number of the location. | |
""" | |
if not station_number.startswith("NL") or not station_number[2:].isnumeric() or len(station_number) != 7: | |
raise ValueError(f"Invalid station number {station_number}, should be of the form NL<digit><digit><digit><digit><digit>.") | |
# Sending a GET request | |
response = requests.get(f"{base_url}{station_number}") | |
# If the GET request is successful, the status code will be 200 | |
if response.status_code == 200: | |
# Convert the response to JSON | |
data = response.json() | |
# Return the data | |
return data["data"] | |
else: | |
raise Exception(f"Failed to get data for station {station_number}, got status code {response.status_code}") | |
def get_component_info(component: None | str = None, base_url: str = "https://api.luchtmeetnet.nl/open_api/components/") -> dict: | |
"""Get component information from the LML API. | |
Parameters: | |
----------- | |
base_url : str | |
The base URL of the LML API. | |
component : str | None | |
The component. If None provided, all components will be returned. | |
""" | |
# Sending a GET request | |
response = requests.get(f"{base_url}{component if component else ''}") | |
# If the GET request is successful, the status code will be 200 | |
if response.status_code == 200: | |
# Convert the response to JSON | |
data = response.json() | |
# Return the data | |
return data["data"] | |
else: | |
raise Exception(f"Failed to get data for component {component}, got status code {response.status_code}") | |
def get_measurements(component: str, location_code: str, stop_date: None | pd.Timestamp = None, base_url: str = "https://api.luchtmeetnet.nl/open_api/stations/") -> pd.DataFrame: | |
"""Get measurements from the LML API. | |
Parameters: | |
----------- | |
base_url : str | |
The base URL of the LML API. | |
component : str | |
The component. | |
location_code : str | |
The location code. | |
stop_date : pd.Timestamp | None | |
The stop date. If None provided, all measurements will be returned. | |
""" | |
# Empty list to store all data | |
all_data = [] | |
# Starting page | |
page = 1 | |
# Initialize the progress bar | |
pbar = tqdm() | |
while True: | |
try: | |
# Sending a GET request | |
response = requests.get(f"{base_url}{location_code}/measurements?page={page}&formula={component}") | |
# If the GET request is successful, the status code will be 200 | |
if response.status_code == 200: | |
# Convert the response to JSON | |
data = response.json() | |
# Add the data from the current page to all_data | |
all_data.extend(data["data"]) | |
# Update the progress bar | |
pbar.update() | |
if stop_date and pd.to_datetime(data["data"][-1]["timestamp_measured"]) < stop_date: | |
break | |
# If there is a next page, update the page number, else break the loop | |
if data["pagination"]["last_page"] and (page != data["pagination"]["last_page"]) and data["pagination"]["next_page"]: | |
page = data["pagination"]["next_page"] | |
else: | |
break | |
else: | |
print(f"Failed to get data for page {page}") | |
break | |
except Exception as e: | |
print(e) | |
break | |
# Close the progress bar | |
pbar.close() | |
# Convert all_data to a pandas DataFrame | |
return pd.DataFrame(all_data).assign(timestamp_measured = lambda x: pd.to_datetime(x["timestamp_measured"])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment