Skip to content

Instantly share code, notes, and snippets.

@alex-hhh
Last active September 19, 2023 12:14
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alex-hhh/e1a4ffe3e861ad3fd31f6655a3a66b26 to your computer and use it in GitHub Desktop.
Save alex-hhh/e1a4ffe3e861ad3fd31f6655a3a66b26 to your computer and use it in GitHub Desktop.
"""Update weather data for sessions in an ActivityLog2 database using data
from the Meteostat service. The script will find sessions that have
latitude/longitude coordinates but no weather data, and will attempt to fetch
weather info from the Meteostat service and store it attached to the each
session. Sessions that already have weather data will not be updated.
For ActivityLog2, see https://github.com/alex-hhh/ActivityLog2
For Meteostat, see https://meteostat.net/en/
To use it, you will need to install Python 3, then install the meteostat
package using "python3 -m pip install meteostat". You will also need to
update this script to point to your AL2 database, see the "USER CONFIGURATION
OPTIONS" section at the end of this script.
ActivityLog2 stores all data in a SQLite3 database and this script will
accesss the database file directly to fetch information about sessions and add
weather records to them. You can find the database schema file at:
https://github.com/alex-hhh/ActivityLog2/blob/master/sql/db-schema.sql
"""
from datetime import datetime, timedelta, timezone;
import meteostat as mto; # python3 -m pip install meteostat
import os
import pandas as pd;
import sqlite3;
import sys;
import time;
LOG_ERROR = 0;
LOG_WARNING = 1;
LOG_INFO = 2;
LOG_DETAIL = 3;
LOG_DEBUG = 4;
def log_level_2_name(level):
"""Convert a log level code into a name to be used for display by the
`log` function below
"""
if level == LOG_ERROR:
return "ERROR";
elif level == LOG_WARNING:
return "WARNING";
elif level == LOG_INFO:
return "INFO";
elif level == LOG_DETAIL:
return "DETAIL";
elif level == LOG_DEBUG:
return "DEBUG";
else:
return "UNKNOWN LEVEL";
def log(level, msg):
"""Log a message at the specified log level.
There is some control over what messages are logged and where the logs go
(to the console and/or to the file). See settings of the log function
below.
"""
# The log function puts a timestamp as the number of seconds since the
# program start -- this is more useful for this type of program than a
# timestamp.
if level > log.level:
return; # skip logging these messages
lvl = log_level_2_name(level);
duration = (datetime.now() - log.start_time).total_seconds();
msg = f"[{duration:>8.1f}] {lvl} {msg}";
if log.to_stdout:
print(msg);
if log.file_output != None:
log.file_output.write(msg);
log.file_output.write('\n');
log.level = LOG_INFO; # drop log messages with lower priority than this one
log.to_stdout = True; # Whether to log to stdout
log.file_output = None; # File to log output to (initialized later)
log.start_time = datetime.now(); # Tell the log function when the program started
def fetch_sessions_without_weather_data(dbc):
"""Return the sessions with no weather data from the database cursor DBC.
We look for sessions which have a latitude/longitude point (cannot fetch
weather if we don't know the location), but no record SESSION_WEATHER.
Returns a list of sessions, for each row we have "session id", "timestamp"
(when the session started) and latitude/longitude location.
"""
dbc.execute('''
select P.session_id, min(T.timestamp), T.position_lat, T.position_long
from A_TRACKPOINT T,
A_LENGTH L,
A_LAP P
where L.lap_id = P.id
and T.length_id = L.id
and T.position_lat is not null
and T.position_long is not null
and P.session_id not in (select session_id from SESSION_WEATHER)
group by P.session_id
order by P.session_id desc
''');
return dbc.fetchall();
## SQL query to insert a weather observation record. Used by
## `put_session_weather`
insert_session_weather_sql = '''
insert into SESSION_WEATHER(
session_id,
wstation,
weather_status_id,
timestamp,
temperature,
dew_point,
humidity,
wind_speed,
wind_gusts,
wind_direction,
pressure,
position_lat,
position_long)
values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''';
def put_session_weather(dbc, session_id, wobs):
"""Store in the database cursor DBC the weather observation data in WOBS
for the sesion identified by SESSION_ID.
WOBS is a hash table containing weather parameters for a session, such as
temperature, humidity, etc.
WARNING: you will need to commit the database transaction for the changes
to take effect. This function does not commit the transaction.
"""
timestamp = wobs['timestamp']; # must exist
wstation = wobs['wstation']; # must exist
# All other weather data is optional and OK not to exist
weather_status_id = wobs['weather_status_id'] if 'weather_status_id' in wobs else None;
temperature = wobs['temperature'] if 'temperature' in wobs else None;
dew_point = wobs['dew_point'] if 'dew_point' in wobs else None;
humidity = wobs['humidity'] if 'humidity' in wobs else None;
wind_speed = wobs['wind_speed'] if 'wind_speed' in wobs else None;
wind_direction = wobs['wind_direction'] if 'wind_direction' in wobs else None;
wind_gusts = wobs['wind_gusts'] if 'wind_gusts' in wobs else None;
pressure = wobs['pressure'] if 'pressure' in wobs else None;
position_lat = wobs['position_lat'] if 'position_lat' in wobs else None;
position_long = wobs['position_long'] if 'position_long' in wobs else None;
dbc.execute(
insert_session_weather_sql,
(session_id,
wstation,
weather_status_id,
timestamp,
temperature,
dew_point,
humidity,
wind_speed,
wind_gusts,
wind_direction,
pressure,
position_lat,
position_long));
def nearby_stations(lat, lon, count = 10):
"""Return the list of nearby stations around the position LAT/LON. At
most COUNT stations are returned in a Pandas data frame.
"""
return mto.Stations().nearby(lat, lon).fetch(count);
def hourly_observations(station_id, t, hours_around = 5):
"""Return a Pandas data frame for hourly observations for the station
STATION_ID. Observations are returned around the timestamp T, a datetime
object.
"""
delta = timedelta(hours = hours_around);
return mto.Hourly(station_id, t - delta, t + delta).fetch();
# Map meteostat weather codes to AL2 weather codes. See bottom of this file
# for the weather code mappings.
coco_to_al2codes = {
1 : 0, 2 : 1, 3 : 22, 4 : 2, 5 : 8, 6 : 8, 7 : 16 ,
8 : 3, 9 : 17, 10 : 3, 11 : 17, 12 : 20, 13 : 21,
14 : 18, 15 : 4, 16 : 19, 17 : 13, 18 : 13, 19 : 13,
20 : 21, 21 : 4, 22 : 19, 23 : 14, 24 : 12, 25 : 6,
26 : 6, 27 : 6};
def coco2al2(code):
"""Return the AL2 weather code (weather_status_id) that corresponds to the
Meteostat weather CODE. Return None if the code cannot be mapped.
"""
try:
return coco_to_al2codes[int(code)];
except:
return None;
def extract_observations(timestamp, observations):
"""Extract the closest weather data to TIMESTAMP (a unix timestamp) from
OBSERVATIONS, a Pandas data frame containing observations indexed by a
timestamp. Returns a dictionary of weather values to match an AL2 weather
record.
"""
# NOTE: Meteostat deals with timestamps in UTC format, but also uses
# "naive" date time objects. So we need to convert our timestamp into a
# UTC date time, than drop the timezone info. Yes, this is somewhat dumb.
ts = datetime.fromtimestamp(timestamp, tz = timezone.utc).replace(tzinfo = None);
obs_low = observations[observations.index < ts];
obs_high = observations[observations.index > ts];
timestamp_low = time.mktime(obs_low.index[-1].timetuple());
timestamp_high = time.mktime(obs_high.index[0].timetuple());
obs = obs_high;
obs_idx = 0;
if (timestamp - timestamp_low) < (timestamp_high - timestamp):
obs = obs_low;
obs_idx = -1;
# Meteostat wind speeds and gusts are in km/h, but Al2 stores them in
# meters/seconds, so we convert them here.
wspd = obs['wspd'][obs_idx];
if wspd != None and not pd.isnull(wspd):
wspd = wspd / 3.6;
wpgt = obs['wpgt'][obs_idx];
if wpgt != None and not pd.isnull(wpgt):
wpgt = wpgt / 3.6;
wobs = {
'timestamp': int(time.mktime(obs.index[-1].timetuple())),
'temperature' : obs['temp'][obs_idx],
'dew_point' : obs['dwpt'][obs_idx],
'humidity': obs['rhum'][obs_idx],
'wind_speed' : wspd,
'wind_direction' : obs['wdir'][obs_idx],
'wind_gusts' : wpgt,
'pressure' : obs['pres'][obs_idx],
'weather_status_id' : coco2al2(obs['coco'][obs_idx])
};
return wobs;
# Columns we want to print out for weather stations in
# `get_weather_observations`
print_columns = [
'name', 'country', 'region','wmo', 'icao', 'latitude',
'longitude', 'elevation', 'timezone', 'distance'];
def get_weather_observations(session_id, timestamp, lat, lon, max_station_distance):
"""Return weather observations for the session SESSION_ID at TIMESTAMP.
The function will look for a weather station at most MAX_STATION_DISTANCE
(in meters) from the LAT/LON coordinate, assumed to be the start location
for the session identified by SESSION_ID.
If such a weather station is found, it will try to fetch observations
around TIMESTAMP, a UNIX timestamp, assumed to be the start time of the
session identified by SESSION_ID.
Returns a hash table with weather observations or None, if no such
observations are found.
"""
# NOTE: Meteostat deals with timestamps in UTC format, but also uses
# "naive" date time objects. So we need to convert our timestamp into a
# UTC date time, than drop the timezone info. Yes, this is somewhat dumb.
t = datetime.fromtimestamp(timestamp, tz = timezone.utc).replace(tzinfo = None);
log(LOG_DETAIL, f"Session {session_id} start location lat = {lat}, lon = {lon}");
log(LOG_DETAIL, f"Will search weather data around {t} ({timestamp})");
nearby = nearby_stations(lat, lon)
if len(nearby.index) == 0:
log(LOG_WARNING, f"No nearby weather stations found for session {session_id}");
return None;
log(LOG_DEBUG, "Candidate weather stations:");
log(LOG_DEBUG, nearby[print_columns]);
station_index = 0;
max_station_index = len(nearby.index);
while (station_index < max_station_index):
station_id = nearby.index[station_index];
# NOTE: we could limit fetching data only from airport weather
# stations by looking for "not pd.isnull(nearby.icao[station_index])"
if nearby.distance[station_index] > max_station_distance:
log(LOG_WARNING, f"Station {station_id} ({nearby.name[station_index]}) is to far away, won't use it");
station_index = station_index + 1;
continue;
log(LOG_DETAIL, f"Trying station {station_id}, {nearby.name[station_index]}");
observations = hourly_observations(station_id, t);
if len(observations.index) == 0:
log(LOG_WARNING, f"Station {station_id} ({nearby.name[station_index]}) has no observations around {t} ({timestamp})");
station_index = station_index + 1;
continue;
# Prepare "base" weather observations record with weather station
# info.
wobs = {
'wstation' : f"meteostat {station_id}",
'position_lat' : nearby['latitude'][0],
'position_long' : nearby['longitude'][0]
};
log(LOG_DETAIL, f"Station {station_id} ({nearby.name[station_index]}) has observations around {t} ({timestamp})");
log(LOG_DEBUG, observations);
# Get the observation closest to our timestamp (from the past)
weather_data = extract_observations(timestamp, observations);
# ... and add them to base
wobs.update(weather_data);
# ... remove None and Nan items from the weather observations.
clean_wobs = [(k, v) for (k, v) in wobs.items()
if v != None and not pd.isnull(v)];
return dict(clean_wobs); # return when the first observations were found
log(LOG_WARNING, f"Could not find weather data for session {session_id}");
return None;
def update_weather_for_sessions(dbc, activity_max_age, max_station_distance):
"""Update weather data for sessions in the databse DBC (a database cursor)
that don't already have weathre data. Only sessions whose start time is
less than ACTIVITY_MAX_AGE (in seconds) are considered.
MAX_STATION_DISTANCE defines the max distance in meters for a weather from
a session start location and it is passed to `get_weather_observations`
NOTE: weather data records are added to the database, but the database
transaction is not commited. To make changes permanent, the database
transaction needs to be commited using `connection.commit()`.
"""
candidates = fetch_sessions_without_weather_data(dbc);
cutoff = time.time() - activity_max_age;
for (session_id, timestamp, lat, lon) in candidates:
if timestamp < cutoff:
t = datetime.fromtimestamp(timestamp);
log(LOG_DETAIL, f"Skipping old session {session_id} from {t} ({timestamp})");
continue;
wobs = get_weather_observations(session_id, timestamp, lat, lon, max_station_distance);
if wobs != None:
log(LOG_INFO, f"Storing weather data for session {session_id}");
put_session_weather(dbc, session_id, wobs);
if __name__ == "__main__":
## USER CONFIGURATION OPTIONS
log.level = LOG_INFO; # logging level, can also use LOG_DETAIL or LOG_DEBUG
max_station_distance = 50 * 1000; # 50 km
activity_max_age = 365 * 24 * 60 * 60; # 1 year
db_file = "ActivityLog.db"; # database file
## NOTE that using sqlite3.connect() on a non-existent file will create a
## new database, but with no tables in it, creating a lot of confusing
## errors, so better check here if the file actualy exists.
if not os.path.exists(db_file):
log(LOG_ERROR, f"Cannot find database file at {db_file}");
sys.exit(1);
log(LOG_INFO, f"Using database {db_file}");
connection = sqlite3.connect(db_file); # open database
update_weather_for_sessions(
connection.cursor(),
activity_max_age,
max_station_distance);
connection.commit(); # commit transaction if everything went fine
connection.close(); # close connection
# Meteostat WeatherCondition codes (coco) mapping to AL2 weather codes, which
# are based on the FIT weather definitions:
# 1 Clear => 0 Clear
# 2 Fair => 1 Partly Cloudy
# 3 Cloudy => 22 Cloudy
# 4 Overcast => 2 Mostly Cloudy
# 5 Fog => 8 Fog
# 6 Freezing Fog => 8 Fog
# 7 Light Rain => 16 Light Rain
# 8 Rain => 3 Rain
# 9 Heavy Rain => 17 Heavy Rain
# 10 Freezing Rain => 3 Rain
# 11 Heavy Freezing Rain => 17 Heavy Rain
# 12 Sleet => 20 Light Rain Snow
# 13 Heavy Sleet => 21 Heavy Rain Snow
# 14 Light Snowfall => 18 Light Snow
# 15 Snowfall => 4 Snow
# 16 Heavy Snowfall => 19 Heavy Snow
# 17 Rain Shower => 13 Scattered Showers
# 18 Heavy Rain Shower => 13 Scattered Showers
# 19 Sleet Shower => 13 Scattered Showers
# 20 Heavy Sleet Shower => 21 Heavy Rain Snow
# 21 Snow Shower => 4 Snow
# 22 Heavy Snow Shower => 19 Heavy Snow
# 23 Lightning => 14 Scattered Thunderstorms
# 24 Hail => 12 Hail
# 25 Thunderstorm => 6 Thunderstorms
# 26 Heavy Thunderstorm => 6 Thunderstorms
# 27 Storm => 6 Thunderstorms
# All AL2 Weather Condition Codes, based on FIT weather records definitions:
# 0 Clear
# 1 Partly Cloudy
# 2 Mostly Cloudy
# 3 Rain
# 4 Snow
# 5 Windy
# 6 Thunderstorms
# 7 Wintry Mix
# 8 Fog
# 11 Hazy
# 12 Hail
# 13 Scattered Showers
# 14 Scattered Thunderstorms
# 15 Unknown Precipitation
# 16 Light Rain
# 17 Heavy Rain
# 18 Light Snow
# 19 Heavy Snow
# 20 Light Rain Snow
# 21 Heavy Rain Snow
# 22 Cloudy
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment