Skip to content

Instantly share code, notes, and snippets.

Created September 25, 2016 17:38
Show Gist options
  • Save pipitone/7eefdfbc66ab79506e3a9ec51eb11ee8 to your computer and use it in GitHub Desktop.
Save pipitone/7eefdfbc66ab79506e3a9ec51eb11ee8 to your computer and use it in GitHub Desktop.
Create weekly summary pages of all classes and required prep by scraping MedTech
#!/usr/bin/env python
Create medtech weekly summary pages
This utility fetches the medtech calendar feed, and then visit each event page
to scrape the required prep and other resources.
Usage: [options] [<date>]
--user USER
--pass PASS
--ical URL [default:]
--pre-post-week Emit pages for the week, previous and following week
--link-index-html Link index.html to the current summary page
from bs4 import BeautifulSoup
from dominate.tags import *
import docopt
import collections
import datetime
import dateutil.parser
import dominate
import getpass
import icalendar as ical
import os
import os.path
import requests
import time
# SITE_BASE and MT_BASE are base urls for the summary page site and medtech,
# respectively.
MT_BASE = ""
def create_week_summary_page(ical_data, login, date):
"""Do the work of create the summary page
We expect an ical feed, and medtech login details, as well as date used to
determine the week of interest.
# compute the dates for the start and end of the week
# (we don't assume the date given was the start of the week)
now = date.replace(hour=0, minute=0, second=0, microsecond=0)
start = now - datetime.timedelta(days=now.weekday())
end = start + datetime.timedelta(days=5)
# fetch events for the week and organize them by day
weekday_events = collections.defaultdict(list)
for event in ical.Calendar.from_ical(ical_data).walk("VEVENT"):
event_date = event.decoded('dtstart').replace(tzinfo=None)
if event_date < start or event_date > end:
# construct the summary webpage
outputfile = '{}.html'.format(
_html = dominate.document(title="Summary of week {}".format(
# steal css/script links from the medtech dashboard so that styling works
page = + '/dashboard', data=login)
soup = BeautifulSoup(page.text, 'html.parser')
for link in soup.find_all('link'):
attrs = link.attrs
if 'type' not in attrs or attrs['type'] != 'text/css':
# make fully-qualified URLs for the stylesheets since they have a
# different base
href='' + attrs['href'],
media='media' in attrs and attrs['media'] or None,
# some custome styling
_html.head.add(style("body { margin: auto 10%; }", type="text/css"))
_body = _html.body
# a warning message
with _body.add(div(style="margin: 10px; margin-bottom: 30px; "
"padding: 15px 5px 2px 15px; font-size: small; line-height: 1em; "
"background-color: rgba(255,255,25,0.1); border: thin dashed orange;")):
p("Last Updated: {}".format(
p("Don't trust any of this. If you fail medical school because you trust "
"this, it's not on me. :D")
# links for navigation from week to week
with _body.add(div(style="overflow:hidden")):
div(a("<< prev week", href=SITE_BASE + "/{}.html".format((start - datetime.timedelta(weeks=1)).date())),
div(a("next week >>", href=SITE_BASE + "/{}.html".format((start + datetime.timedelta(weeks=1)).date())),
# finally, create the content
for date in sorted(weekday_events.keys()):
_body.add(h1(date.strftime("%a, %b %d %Y")))
_datediv = div(style='padding-left: 10px; margin-bottom: 40px;')
for event in weekday_events[date]:
href=event['url'], style="font-size: x-small", target="_blank")
_eventdiv = div(style='padding-left: 10px;')
# fetch the medtech page content for the date
page =['url'], data=login)
soup = BeautifulSoup(page.text, 'html.parser')
# extract the "required preparation" section
required = soup.find_all("h3", text="Required Preparation")
if required:
required.extend([e for e in required[0].next_siblings])
d = div("placeholder")
d[0] = "".join(map(unicode, required))
# extract the event resources
res = soup.find(id='event-resources-container')
if res:
d = div('placeholder')
d[0] = "".join(map(unicode, res))
# inject some sweet javscript that makes headings collapse/expand
# visibility of their associated content
js = """
$('h1').each(function(index, element) {
$(this).click(function() {
$('h2').each(function(index, element) {
$(this).click(function() {
$('.timeframe-heading').each(function(index, element) {
$(this).click(function() {
_html.body.add(script(js, type="text/javascript"))
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
ga('create', 'UA-84357249-1', 'auto');
ga('send', 'pageview');
""", type="text/javascript"))
pagefile = open(outputfile, 'wb')
return outputfile
def main():
arguments = docopt.docopt(__doc__)
nowdate = arguments['<date>'] and dateutil.parser.parse(
arguments['<date>']) or
login = {
'username': arguments['--user'] or raw_input("MEdTech username: "),
'password': arguments['--pass'] or getpass.getpass(),
'submit': 'Login',
'action': 'login'}
ical_url = arguments['--ical']
ical_r = requests.get(ical_url)
ical_data = ical_r.text
outputfile = create_week_summary_page(ical_data, login, nowdate)
if arguments['--link-index-html']:
os.path.exists('index.html') and os.remove('index.html')
os.symlink(outputfile, 'index.html')
if arguments['--pre-post-week']:
ical_data, login, nowdate - datetime.timedelta(weeks=1))
ical_data, login, nowdate + datetime.timedelta(weeks=1))
if __name__ == '__main__':
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment