Skip to content

Instantly share code, notes, and snippets.

@snewcomer
Created January 8, 2023 15:56
Show Gist options
  • Save snewcomer/c89599076e204787bebf1ca3c1c27171 to your computer and use it in GitHub Desktop.
Save snewcomer/c89599076e204787bebf1ca3c1c27171 to your computer and use it in GitHub Desktop.
Busy in 30 min intervals
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
cal = pd.read_csv('cal.csv')
# clean summary
cal = cal.query("summary not in ('Mid-week Meditation', 'Coffee Chat: Meet our New Hires!', 'PED Lunch & Learn Hold', 'AskPeople')")
cal = cal[cal['summary'].str.contains("Company Holiday") == False]
cal = cal[cal['summary'].str.contains("L&L") == False]
#cal.drop(['summary'], axis=1, inplace=True)
cal = cal.drop_duplicates()
# clean dtstart
parsed = pd.to_datetime(cal["dtstart"], errors="coerce").fillna(pd.to_datetime(cal["dtstart"], format="%Y-%d-%m", errors="coerce"))
ordinal = pd.to_numeric(cal["dtstart"], errors="coerce").apply(lambda x: pd.Timestamp("1899-12-30")+pd.Timedelta(x, unit="D"))
parsed_e = pd.to_datetime(cal["dtend"], errors="coerce").fillna(pd.to_datetime(cal["dtend"], format="%Y-%d-%m", errors="coerce"))
ordinal_e = pd.to_numeric(cal["dtend"], errors="coerce").apply(lambda x: pd.Timestamp("1899-12-30")+pd.Timedelta(x, unit="D"))
cal['dtstart'] = parsed.fillna(ordinal)
cal['dtend'] = parsed_e.fillna(ordinal_e)
# create unique column we can group and resample over
cal['DtCombined'] = cal['dtstart'].astype(str) + '_' + cal['dtend'].astype(str)
# still might have problems, drop Zulu
cal = cal.dropna()
cal['dtstart'] = cal['dtstart'].apply(lambda x: datetime.replace(x, tzinfo=None))
cal['dtend'] = cal['dtend'].apply(lambda x: datetime.replace(x, tzinfo=None))
cal['MeetingLength'] = cal['dtend'] - cal['dtstart']
cal['Date'] = cal['dtstart'].dt.date
cal['Date'] = pd.to_datetime(cal['Date'], errors="coerce")
cal['StartTime'] = cal['dtstart'].dt.time
cal['EndTime'] = cal['dtend'].dt.time
cal = cal.melt(id_vars=['summary', 'Date', 'dtstart', 'dtend', 'DtCombined', 'MeetingLength'], var_name='Start/End', value_name='TimeOfDay')
cal['Busy'] = 1
# conditional create column that we will ultimately resample
cal['DateTime'] = np.where(cal['Start/End'] == 'StartTime', cal['dtstart'], cal['dtend'])
cal.sort_values(by=['dtstart', 'TimeOfDay'], inplace=True)
cal.drop(['dtstart', 'dtend', 'Start/End'], axis=1, inplace=True)
cal = cal.groupby('DtCombined').apply(lambda x: x.drop_duplicates('DateTime').set_index('DateTime').resample('30Min').ffill()).reset_index('DtCombined', drop=True).drop('DtCombined', 1).reset_index()
cal['TimeOfDay'] = cal['DateTime'].dt.time
# resample drops non numeric columns
cal = cal.set_index('DateTime').resample('30Min').mean().reset_index()
cal['Busy'] = cal['Busy'].fillna(0)
cal['Weekday'] = cal['DateTime'].dt.weekday
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment