Skip to content

Instantly share code, notes, and snippets.

@wvandeweyer
Created November 4, 2023 20:26
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wvandeweyer/e5aeaa5fa486549fbb1a1b7be9fdd692 to your computer and use it in GitHub Desktop.
Save wvandeweyer/e5aeaa5fa486549fbb1a1b7be9fdd692 to your computer and use it in GitHub Desktop.
import pandas as pd
import pytz
from datetime import datetime
from pytz.exceptions import AmbiguousTimeError
from tqdm import tqdm
# Read the CSV file - Replace fluviusdata with filename
df = pd.read_csv('fluviusdata.csv', delimiter=';')
# Replace commas with periods in the 'Volume' column
df['Volume'] = df['Volume'].str.replace(',', '.')
# Initialize a set to keep track of previous UTC times
previous_utc_times = set()
# Function to handle the daylight saving time change
def handle_dst(row):
local_time = datetime.strptime(f"{row['Van Datum']} {row['Van Tijdstip']}", "%d-%m-%Y %H:%M:%S")
brussels = pytz.timezone('Europe/Brussels')
# Check if this is the ambiguous 2 AM hour on the day of DST change
if local_time.hour == 2 and local_time.month == 10 and (local_time.day == 29 or local_time.day == 30):
try:
# First try to localize assuming DST (is_dst=True)
local_dt = brussels.localize(local_time, is_dst=True)
utc_dt = local_dt.astimezone(pytz.utc)
utc_str = utc_dt.strftime("%Y-%m-%d %H:%M:%S")
# If the UTC time already exists, it means we've hit the second instance of 2 AM, hence we re-localize as non-DST
if utc_str in previous_utc_times:
local_dt = brussels.localize(local_time, is_dst=False)
utc_dt = local_dt.astimezone(pytz.utc)
# Add the UTC time to the set to keep track
previous_utc_times.add(utc_dt.strftime("%Y-%m-%d %H:%M:%S"))
return utc_dt.strftime("%Y-%m-%d %H:%M:%S")
except AmbiguousTimeError:
# In case of an error, assume standard time
local_dt = brussels.localize(local_time, is_dst=False)
utc_dt = local_dt.astimezone(pytz.utc)
previous_utc_times.add(utc_dt.strftime("%Y-%m-%d %H:%M:%S"))
return utc_dt.strftime("%Y-%m-%d %H:%M:%S")
else:
# For non-ambiguous times, just localize without DST assumptions
local_dt = brussels.localize(local_time, is_dst=None)
utc_dt = local_dt.astimezone(pytz.utc)
return utc_dt.strftime("%Y-%m-%d %H:%M:%S")
# Function to convert volume to Wh and replace nulls with zero
def handle_volume(volume):
if pd.isnull(volume) or volume == '':
return 0
else:
return int(float(volume) * 1000)
# Function to map register values
def map_register(register):
register_map = {
'Afname Nacht': 'C',
'Afname Dag': 'C',
'Injectie Dag': 'I',
'Injectie Nacht': 'I'
}
return register_map.get(register, register)
# Function to process each row
def process_row(row):
row['UTC Timestamp'] = handle_dst(row)
row['Volume'] = handle_volume(row['Volume'])
row['Register'] = map_register(row['Register'])
return row
# Apply the processing function to each row with a progress bar
tqdm.pandas() # Enable progress_apply in pandas with tqdm
processed_df = df.progress_apply(process_row, axis=1)
# Select only the desired columns
final_df = processed_df[['UTC Timestamp', 'Register', 'Volume']]
# Write the final DataFrame to a new CSV file
final_df.to_csv('fluviusdata_converted.csv', index=False, sep=';')
print("Conversion completed. The output is saved in 'fluviusdata_converted.csv'.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment