Skip to content

Instantly share code, notes, and snippets.

@rcyeh
Created September 6, 2020 17:19
Show Gist options
  • Save rcyeh/80edda7a6f8286b4f2ed4197c9e2a31c to your computer and use it in GitHub Desktop.
Save rcyeh/80edda7a6f8286b4f2ed4197c9e2a31c to your computer and use it in GitHub Desktop.
Convert NYC 2020-2021 School Year Calendar to csv for import to Google Calendar
curl https://www.schools.nyc.gov/about-us/news/2020-2021-school-year-calendar > raw.html
cat raw.html | sed -z -e " \
s/[\r\n]//g;
s/.*<table>//;
s/<\/table>.*//;
s/ </</g;
s/<\/*strong>//g;
s/<\/tr>\(<\(\/thead\|\/tbody\|tbody\|tr\)>\)*/\n/g;
s/<\/t[hd]><t[hd]>/\t/g;
s/<[^>]*>//g;
s/\&ndash;/-/g;
s/\&nbsp;/ /g;
s/\&rsquo;/'/g;
s/\&shy;//g;
" > calendar.txt
python calendar_filter.py calendar.txt > calendar.csv
"""
calendar_filter.py
Converts tab-separated calendar data to csv for import to Google Calendar
"""
import fileinput
def convert_date(datestring, increment_day=False):
if '-' in datestring:
[start, end] = datestring.split('-')
if ' ' not in end:
[startmonth, startdom] = start.split(' ')
end = startmonth + ' ' + end
return tuple([convert_date(start)[0], convert_date(end, True)[0]])
elif ' ' in datestring:
[month, dom] = datestring.split(' ')
if increment_day:
# workaround for Google Calendar quirk: multi-day events imported with end shortened by one day
dom = str(int(dom) + 1)
mm = {'September': '09', 'October': '10', 'November': '11', 'December': '12',
'January': '01', 'February': '02', 'March': '03',
'April': '04', 'May': '05', 'June': '06', }[month]
yyyy = '2020' if mm in ['09', '10', '11', '12'] else '2021'
mmddyyyy = mm + '/' + dom + '/' + yyyy
return tuple([mmddyyyy, mmddyyyy])
else:
return tuple(['Start Date', 'End Date'])
for line in fileinput.input():
[date, dow, subject] = line.rstrip().split("\t");
[start_date, end_date] = convert_date(date)
if "NOTES" == subject:
subject = "Subject"
all_day_event = "All Day Event"
description = "Description"
else:
subject = '"' + subject + '"'
all_day_event = "True"
description = '"https://www.schools.nyc.gov/calendar\nhttps://www.schools.nyc.gov/about-us/news/2020-2021-school-year-calendar"'
print(",".join([subject, start_date, end_date, all_day_event, description]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment