Skip to content

Instantly share code, notes, and snippets.

@EmanuelKuhn
Last active April 16, 2021 20:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save EmanuelKuhn/bc499b90c715907e3c12e2e484fe2945 to your computer and use it in GitHub Desktop.
Save EmanuelKuhn/bc499b90c715907e3c12e2e484fe2945 to your computer and use it in GitHub Desktop.
Obtaining the data

Electricity prices

The electricity prices dataset can be downloaded from: transparency.entsoe.eu.

To use it with our notebook, the yearly csv format should be downloaded for each year. To download the files it is necessary to create an account on transparency.entsoe.eu

Weather data

We obtained the weather data from World Weather Online’s historical weather API. For this an API key is needed. However when we did a project a free trial was available. To gather the data and convert it to the right format, two pyton scripts are included in this gist.

Use on colab

To use the data on Google colab, the obtained data files can be uploaded to a Google drive folder which can be accessed from the notebook.

import os
import string
import time
import requests
from pandas.io import json
import calendar
BASE_URL = "https://api.worldweatheronline.com/premium/v1/past-weather.ashx/"
PARAMETERS = {
"q": "50.641111,4.668056", # Middle of belgium: https://en.wikipedia.org/wiki/Geography_of_Belgium#Extreme_points
"tp": "1", # Weather hourly
"format": "json",
"key": "YOUR_API_KEY"
}
def get_startdate_enddate_for_month(year, month):
month_number_of_days = calendar.monthrange(year, month)[1]
start_date = f"{year}-{format(month, '02d')}-01"
end_date = f"{year}-{format(month, '02d')}-{format(month_number_of_days, '02d')}"
return start_date, end_date
def request_weather(start_date, end_date):
params = PARAMETERS.copy()
params["date"] = start_date
params["enddate"] = end_date
response = requests.get(BASE_URL, params=params)
return response
def request_and_save_weather(start_date, end_date):
response = request_weather(start_date, end_date)
print(f"status: {response.status_code}")
if response.text.lower().find("error") != -1:
print("ERROR:")
print(response.text)
with open(f"responses-raw/error_{start_date}_{end_date}_{response.status_code}.json", 'w') as f:
f.write(response.text)
f.close()
else:
with open(f"responses-raw/weather_{start_date}_{end_date}_{response.status_code}.json", 'w') as f:
f.write(response.text)
f.close()
if __name__ == '__main__':
for year in range(2021, 2022): # Weather until 2020
for month in range(1, 5):
start_date, end_date = get_startdate_enddate_for_month(year, month)
already_exists = os.path.isfile(f"responses-raw/weather_{start_date}_{end_date}_{200}.json")
if not already_exists:
request_and_save_weather(start_date, end_date)
time.sleep(1)
import json
import os
import pandas as pd
from datetime import datetime
from gather_weather_data import get_startdate_enddate_for_month
PATH_PREFIX = "."
COLUMNS = ["tempC", "windspeedKmph", "cloudcover", "uvIndex"]
def parse(response):
month_hourly_DF = pd.DataFrame(columns=COLUMNS)
weather_days = response['data']['weather']
for weather_day in weather_days:
date = weather_day["date"]
# print(date)
date = pd.to_datetime(date, infer_datetime_format=True).date()
weather_hours = weather_day['hourly']
for weather_hour in weather_hours:
while len(weather_hour["time"]) < 4:
weather_hour["time"] = f"0{weather_hour['time']}"
assert len(weather_hour["time"]) == 4, weather_hour["time"]
time = pd.to_datetime(weather_hour["time"], format="%H%M").time()
dt = datetime.combine(date, time)
hour_series = pd.Series(weather_hour, name=dt)
hour_series = hour_series[COLUMNS]
month_hourly_DF = month_hourly_DF.append(hour_series)
month_hourly_DF = month_hourly_DF.sort_index()
return month_hourly_DF
if __name__ == '__main__':
full_hourly_DF = pd.DataFrame(columns=COLUMNS)
for year in range(2021, 2022): # Weather until 2020
for month in range(1, 4):
print(f"year: {year}, month: {month}")
start_date, end_date = get_startdate_enddate_for_month(year, month)
file_path = f"{PATH_PREFIX}/responses-raw/weather_{start_date}_{end_date}_{200}.json"
response_exists = os.path.isfile(file_path)
if not response_exists:
print("QUITTING")
print(f"No response with {start_date}, {end_date} exists")
exit(-1)
response = pd.read_json(file_path)
monthly_df = parse(response)
full_hourly_DF = full_hourly_DF.append(monthly_df)
full_hourly_DF.index.name = "Date"
full_hourly_DF.to_csv(f"Weather_{year}.csv")
print(full_hourly_DF.head())
print(len(full_hourly_DF))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment