Skip to content

Instantly share code, notes, and snippets.

@snewcomer
Last active January 6, 2023 15:20
Show Gist options
  • Save snewcomer/8785f1c2b715aa78030b5ff7b4557b05 to your computer and use it in GitHub Desktop.
Save snewcomer/8785f1c2b715aa78030b5ff7b4557b05 to your computer and use it in GitHub Desktop.
cal training data
import pandas as pd
from datetime import datetime
cal = pd.read_csv('cal.csv')
# clean summary
cal = cal.query("summary not in ('Mid-week Meditation', 'Coffee Chat: Meet our New Hires!')")
cal = cal[cal['summary'].str.contains("Company Holiday") == False]
# clean dtstart
parsed = pd.to_datetime(cal["dtstart"], errors="coerce").fillna(pd.to_datetime(cal["dtstart"], format="%Y-%d-%m", errors="coerce"))
ordinal = pd.to_numeric(cal["dtstart"], errors="coerce").apply(lambda x: pd.Timestamp("1899-12-30")+pd.Timedelta(x, unit="D"))
cal['dtstart'] = parsed.fillna(ordinal)
# still might have problems
cal = cal.dropna()
cal['dtstart'] = cal['dtstart'].apply(lambda x: datetime.replace(x, tzinfo=None))
cal = cal.reset_index(drop=True)
cal['weekday'] = cal['dtstart'].dt.weekday
cal = cal.sort_values(by='dtstart', ascending=False)
#cal.to_csv("out.csv", columns=['dtstart', 'weekday', 'summary'])
gr = cal.groupby(cal['weekday'])['dtstart'].count()
print(gr)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment