Skip to content

Instantly share code, notes, and snippets.

@mfleschutz
Created January 20, 2022 15:25
Show Gist options
  • Save mfleschutz/946524b335c6bdee889c6bb43e91b28c to your computer and use it in GitHub Desktop.
Save mfleschutz/946524b335c6bdee889c6bb43e91b28c to your computer and use it in GitHub Desktop.
Data for gsee via wetterdienst package

This is how we can use this function:

get_data_from_wetterdienst(dataset="temperature_air", year=2021, coords=(49.0, 8))
--> Loading temperature_air data from German Weather Service station: Bergzabern, Bad (Rheinland-Pfalz), distance: 11.9 km.
0        3.2
        ... 
8758    10.5
Name: value, Length: 8760, dtype: float64
get_data_from_wetterdienst(dataset="solar", year=2021, coords=sample_coords)
--> Loading solar data from German Weather Service station: Mannheim (Baden-Württemberg), distance: 83.8 km.
0       0.0
       ... 
8759    0.0
Name: radiation_global, Length: 8760, dtype: float64
from wetterdienst.provider.dwd.observation import DwdObservationRequest
from datetime import datetime, timedelta
from typing import Tuple
import pandas as pd
def get_data_from_wetterdienst(
dataset: str,
year: int,
coords: Tuple,
verbose: bool = True,
) -> pd.Series:
"""Get solar global radiation or air temperature via wetterdienst package.
`dataset` can be 'solar' or 'temperature_air'
"""
_delta_to_UTC = 1 # 1 for Germany for UTC+1
_si_units = dict(temperature_air=False, solar=False)
_tidy = dict(temperature_air=True, solar=False)
stations = DwdObservationRequest(
parameter=dataset,
resolution="hourly",
start_date=datetime(year, 1, 1, 0, 0) + timedelta(hours=-_delta_to_UTC),
end_date=datetime(year, 12, 31, 23, 0) + timedelta(hours=-_delta_to_UTC),
si_units=_si_units[dataset],
tidy=_tidy[dataset],
).filter_by_rank(*coords, 1)
if verbose:
provider_info = stations.provider.value[1]
s = stations.df.loc[0, :]
info = f"--> Loading {dataset} data from {provider_info} station: {s['name']} ({s['state']}), distance: {s['distance']:.1f} km."
print(info)
df = next(stations.values.query()).df
if dataset == "solar":
return df.radiation_global
elif dataset == "temperature_air":
return df[df.parameter == "temperature_air_mean_200"].value
else:
raise RuntimeError("`dataset` must be either 'solar' or 'temperature_air'!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment