Skip to content

Instantly share code, notes, and snippets.

@pipitone
Created September 25, 2016 17:38
Show Gist options
  • Save pipitone/7eefdfbc66ab79506e3a9ec51eb11ee8 to your computer and use it in GitHub Desktop.
Save pipitone/7eefdfbc66ab79506e3a9ec51eb11ee8 to your computer and use it in GitHub Desktop.
Create weekly summary pages of all classes and required prep by scraping MedTech
#!/usr/bin/env python
"""
Create medtech weekly summary pages
This utility fetches the medtech calendar feed, and then visit each event page
to scrape the required prep and other resources.
Usage:
mt-summary.py [options] [<date>]
Options:
--user USER
--pass PASS
--ical URL [default: http://meds.queensu.ca/central/calendars/2020.ics]
--pre-post-week Emit pages for the week, previous and following week
--link-index-html Link index.html to the current summary page
"""
from bs4 import BeautifulSoup
from dominate.tags import *
import docopt
import collections
import datetime
import dateutil.parser
import dominate
import getpass
import icalendar as ical
import os
import os.path
import requests
import time
# SITE_BASE and MT_BASE are base urls for the summary page site and medtech,
# respectively.
MT_BASE = "http://meds.queensu.ca/central"
SITE_BASE = "http://jon.pipitone.ca/medtech"
def create_week_summary_page(ical_data, login, date):
"""Do the work of create the summary page
We expect an ical feed, and medtech login details, as well as date used to
determine the week of interest.
"""
# compute the dates for the start and end of the week
# (we don't assume the date given was the start of the week)
now = date.replace(hour=0, minute=0, second=0, microsecond=0)
start = now - datetime.timedelta(days=now.weekday())
end = start + datetime.timedelta(days=5)
# fetch events for the week and organize them by day
weekday_events = collections.defaultdict(list)
for event in ical.Calendar.from_ical(ical_data).walk("VEVENT"):
event_date = event.decoded('dtstart').replace(tzinfo=None)
if event_date < start or event_date > end:
continue
weekday_events[event_date.date()].append(event)
# construct the summary webpage
outputfile = '{}.html'.format(start.date())
_html = dominate.document(title="Summary of week {}".format(start.date()))
_html.head.add(base(href=MT_BASE))
# steal css/script links from the medtech dashboard so that styling works
page = requests.post(MT_BASE + '/dashboard', data=login)
soup = BeautifulSoup(page.text, 'html.parser')
for link in soup.find_all('link'):
attrs = link.attrs
if 'type' not in attrs or attrs['type'] != 'text/css':
continue
# make fully-qualified URLs for the stylesheets since they have a
# different base
_html.head.add(dominate.tags.link(
href='https://meds.queensu.ca' + attrs['href'],
media='media' in attrs and attrs['media'] or None,
rel='stylesheet',
type=attrs['type']))
_html.head.add(script(type="text/javascript",
src="/central/javascript/jquery/jquery.min.js?release=4.6.0.0"))
# some custome styling
_html.head.add(style("body { margin: auto 10%; }", type="text/css"))
_html.head.add(meta(charset="utf-8"))
_body = _html.body
# a warning message
with _body.add(div(style="margin: 10px; margin-bottom: 30px; "
"padding: 15px 5px 2px 15px; font-size: small; line-height: 1em; "
"background-color: rgba(255,255,25,0.1); border: thin dashed orange;")):
p("Last Updated: {}".format(datetime.datetime.now()))
p("Don't trust any of this. If you fail medical school because you trust "
"this, it's not on me. :D")
# links for navigation from week to week
with _body.add(div(style="overflow:hidden")):
div(a("<< prev week", href=SITE_BASE + "/{}.html".format((start - datetime.timedelta(weeks=1)).date())),
style="float:left;")
div(a("next week >>", href=SITE_BASE + "/{}.html".format((start + datetime.timedelta(weeks=1)).date())),
style="float:right;")
# finally, create the content
for date in sorted(weekday_events.keys()):
_body.add(h1(date.strftime("%a, %b %d %Y")))
_datediv = div(style='padding-left: 10px; margin-bottom: 40px;')
_body.add(_datediv)
for event in weekday_events[date]:
_datediv.add(h2(
event.decoded('summary'),
a(
img(src='http://upload.wikimedia.org/wikipedia/commons/6/64/Icon_External_Link.png'),
href=event['url'], style="font-size: x-small", target="_blank")
))
_eventdiv = div(style='padding-left: 10px;')
_datediv.add(_eventdiv)
# fetch the medtech page content for the date
page = requests.post(event['url'], data=login)
soup = BeautifulSoup(page.text, 'html.parser')
# extract the "required preparation" section
required = soup.find_all("h3", text="Required Preparation")
if required:
required.extend([e for e in required[0].next_siblings])
d = div("placeholder")
d[0] = "".join(map(unicode, required))
_eventdiv.add(d)
# extract the event resources
res = soup.find(id='event-resources-container')
if res:
d = div('placeholder')
d[0] = "".join(map(unicode, res))
_eventdiv.add(d)
# inject some sweet javscript that makes headings collapse/expand
# visibility of their associated content
js = """
$('h1').each(function(index, element) {
$(this).click(function() {
$(this).next('div').toggle();
});
});
$('h2').each(function(index, element) {
$(this).next('div').hide();
$(this).click(function() {
$(this).next('div').toggle();
});
});
$('.timeframe-heading').each(function(index, element) {
$(this).click(function() {
$(this).next('ul').toggle();
});
})
$('ul.timeframe-during').hide();
$('ul.timeframe-post').hide();
$('ul.timeframe-none').hide();
"""
_html.body.add(script(js, type="text/javascript"))
_html.body.add(script("""
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-84357249-1', 'auto');
ga('send', 'pageview');
""", type="text/javascript"))
pagefile = open(outputfile, 'wb')
pagefile.write(_html.__unicode__().encode('utf8'))
pagefile.close()
return outputfile
def main():
arguments = docopt.docopt(__doc__)
nowdate = arguments['<date>'] and dateutil.parser.parse(
arguments['<date>']) or datetime.datetime.now()
login = {
'username': arguments['--user'] or raw_input("MEdTech username: "),
'password': arguments['--pass'] or getpass.getpass(),
'submit': 'Login',
'action': 'login'}
ical_url = arguments['--ical']
ical_r = requests.get(ical_url)
ical_data = ical_r.text
outputfile = create_week_summary_page(ical_data, login, nowdate)
if arguments['--link-index-html']:
os.path.exists('index.html') and os.remove('index.html')
os.symlink(outputfile, 'index.html')
if arguments['--pre-post-week']:
create_week_summary_page(
ical_data, login, nowdate - datetime.timedelta(weeks=1))
create_week_summary_page(
ical_data, login, nowdate + datetime.timedelta(weeks=1))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment