Skip to content

Instantly share code, notes, and snippets.

@kongmunist
Created February 12, 2024 16:30
Show Gist options
  • Save kongmunist/a4945c339b11d4e953e5e806344e42c8 to your computer and use it in GitHub Desktop.
Save kongmunist/a4945c339b11d4e953e5e806344e42c8 to your computer and use it in GitHub Desktop.
Code to correct Fitbit sleep data using Google Timeline data, as described in andykong.org/blog/glocfitbittzcorrection
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# use long/lat at the end of the day to get the timezone, then correct each sleep date's time to UTC.
from timezonefinder import TimezoneFinder
import datetime
import pytz
# how to get offset from UTC from timezone name
pacific_now = datetime.datetime.now(pytz.timezone('US/Pacific'))
pacific_now.utcoffset().total_seconds()/60/60
df = pd.read_csv("cooked/loc.csv")
print(f"Total rows: {df.shape[0]}"
# Assign timezone to each row
obj = TimezoneFinder()
df['timestamp'] = pd.to_datetime(df['timestamp'], format="mixed")
df['timezone'] = df.apply(lambda x: obj.timezone_at(lng=x['Longitude'], lat=x['Latitude']), axis=1)
df['timezone_offset'] = df.apply(lambda x: datetime.datetime.now(pytz.timezone(x['timezone'])).utcoffset().total_seconds()/60/60, axis=1)
# now do DST offset. If offset is >-8 and <-4, then it's in American DST, if >-7 and <-5, then it's in European DST
# American DST:
# Sun, Mar 14, 2021 – Sun, Nov 7, 2021
# Sun, Mar 13, 2022 – Sun, Nov 6, 2022
# Sun, Mar 12, 2023 – Sun, Nov 5, 2023
# Sun, Mar 10, 2024 – Sun, Nov 3, 2024
# Euro DST:
# Sun, Mar 27, 2022 – Sun, Oct 30, 2022
# Sun, Mar 26, 2023 – Sun, Oct 29, 2023
american_dst_pairs = [(datetime.datetime(2021,3,14), datetime.datetime(2021,11,7)), (datetime.datetime(2022,3,13), datetime.datetime(2022,11,6)), (datetime.datetime(2023,3,12), datetime.datetime(2023,11,5)), (datetime.datetime(2024,3,10), datetime.datetime(2024,11,3))]
euro_dst_pairs = [(datetime.datetime(2022,3,27), datetime.datetime(2022,10,30)), (datetime.datetime(2023,3,26), datetime.datetime(2023,10,29))]
# for each row with offset >-8 and <-4, check if it's in american DST
american_rows = df[(df['timezone_offset'] > -8) & (df['timezone_offset'] < -4)]
a = american_rows.apply(lambda x: any([x['timestamp'] > y[0] and x['timestamp'] < y[1] for y in american_dst_pairs]), axis=1)
df['dst'] = a
# for each row with offset >-2 and <3, check if it's in euro DST
euro_rows = df[(df['timezone_offset'] > -2) & (df['timezone_offset'] < 3)]
a = euro_rows.apply(lambda x: any([x['timestamp'] > y[0] and x['timestamp'] < y[1] for y in euro_dst_pairs]), axis=1)
df['dst'] = df['dst'] | a
# convert dst col to int
df['dst'] = df['dst'].astype(int)
# create timestamp_local
df['timestamp_local'] = df['timestamp'] + pd.to_timedelta(df['timezone_offset'], unit='h') - pd.to_timedelta(df['dst'], unit='h')
# df['timestamp_local'] = df['timestamp'] + pd.to_timedelta(df['timezone_offset'], unit='h')
# import the fitbit sleep data
sleepfile = "../fitbitimporterdata/cooked/Sleep_sleep.csv"
sleep = pd.read_csv(sleepfile)
sleep['timestamp'] = pd.to_datetime(sleep['timestamp'], format="mixed")
# get the timezone offset for each sleep date
df['date_local'] = df['timestamp_local'].dt.date
for i, row in sleep.iterrows():
# find closest long/lat to the sleep date. first get only rows with matching date (from local)
localdate = row['timestamp'].date()
localrows = df[df['date_local'] == localdate]
if localrows.shape[0] != 0:
# get the row with the closest time
localrows['diff'] = abs(localrows['timestamp_local'] - row['timestamp'])
closestrow = localrows[localrows['diff'] == localrows['diff'].min()]
sleep.at[i, 'timezone_offset'] = closestrow['timezone_offset'].values[0]
sleep.at[i, 'dst'] = closestrow['dst'].values[0]
sleep.at[i, 'howclose'] = closestrow['diff'].values[0]
# print(localdate)
# if i==15:
# break
if i % 100 == 0:
print(f"Processed {i}/{sleep.shape[0]}")
# how many nan in timezone_offset in sleep
sleep['timezone_offset'].isna().sum()
# filter for nan in timezone_offset
sleep2 = sleep[~sleep['timezone_offset'].isna()]
# plot dates, nans in differnt colors,only after 2021
plt.scatter(sleep2['timestamp'], sleep2['timezone_offset'], c='blue',marker='.')
plt.scatter(sleep[sleep['timezone_offset'].isna()]['timestamp'],[0]*sleep[sleep['timezone_offset'].isna()].shape[0], c='red',marker='.')
plt.xlim([datetime.datetime(2021,5,1), max(sleep['timestamp'])])
plt.title("Timezone offset for each sleep date, missing in red")
# remove rows where howclose is > 6 hr
sleep2 = sleep2[sleep2['howclose'] < pd.Timedelta(6, unit='h')]
sleep2['timestamp_utc'] = sleep2['timestamp'] - pd.to_timedelta(sleep2['timezone_offset'], unit='h') + pd.to_timedelta(sleep2['dst'], unit='h')
sleep2['timestamp'] = sleep2['timestamp_utc']
# save
sleep2.to_csv("../fitbitimporterdata/cooked/Sleep_sleep_loctzcorrected_utc.csv", index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment