Created
September 6, 2020 17:19
-
-
Save rcyeh/80edda7a6f8286b4f2ed4197c9e2a31c to your computer and use it in GitHub Desktop.
Convert NYC 2020-2021 School Year Calendar to csv for import to Google Calendar
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
curl https://www.schools.nyc.gov/about-us/news/2020-2021-school-year-calendar > raw.html | |
cat raw.html | sed -z -e " \ | |
s/[\r\n]//g; | |
s/.*<table>//; | |
s/<\/table>.*//; | |
s/ </</g; | |
s/<\/*strong>//g; | |
s/<\/tr>\(<\(\/thead\|\/tbody\|tbody\|tr\)>\)*/\n/g; | |
s/<\/t[hd]><t[hd]>/\t/g; | |
s/<[^>]*>//g; | |
s/\–/-/g; | |
s/\ / /g; | |
s/\’/'/g; | |
s/\­//g; | |
" > calendar.txt | |
python calendar_filter.py calendar.txt > calendar.csv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
calendar_filter.py | |
Converts tab-separated calendar data to csv for import to Google Calendar | |
""" | |
import fileinput | |
def convert_date(datestring, increment_day=False): | |
if '-' in datestring: | |
[start, end] = datestring.split('-') | |
if ' ' not in end: | |
[startmonth, startdom] = start.split(' ') | |
end = startmonth + ' ' + end | |
return tuple([convert_date(start)[0], convert_date(end, True)[0]]) | |
elif ' ' in datestring: | |
[month, dom] = datestring.split(' ') | |
if increment_day: | |
# workaround for Google Calendar quirk: multi-day events imported with end shortened by one day | |
dom = str(int(dom) + 1) | |
mm = {'September': '09', 'October': '10', 'November': '11', 'December': '12', | |
'January': '01', 'February': '02', 'March': '03', | |
'April': '04', 'May': '05', 'June': '06', }[month] | |
yyyy = '2020' if mm in ['09', '10', '11', '12'] else '2021' | |
mmddyyyy = mm + '/' + dom + '/' + yyyy | |
return tuple([mmddyyyy, mmddyyyy]) | |
else: | |
return tuple(['Start Date', 'End Date']) | |
for line in fileinput.input(): | |
[date, dow, subject] = line.rstrip().split("\t"); | |
[start_date, end_date] = convert_date(date) | |
if "NOTES" == subject: | |
subject = "Subject" | |
all_day_event = "All Day Event" | |
description = "Description" | |
else: | |
subject = '"' + subject + '"' | |
all_day_event = "True" | |
description = '"https://www.schools.nyc.gov/calendar\nhttps://www.schools.nyc.gov/about-us/news/2020-2021-school-year-calendar"' | |
print(",".join([subject, start_date, end_date, all_day_event, description])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment