Skip to content

Instantly share code, notes, and snippets.

@mdshw5
Created March 25, 2014 17:53
Show Gist options
  • Save mdshw5/9767374 to your computer and use it in GitHub Desktop.
Save mdshw5/9767374 to your computer and use it in GitHub Desktop.
import urllib2
from bs4 import BeautifulSoup
from icalendar import Calendar, Event
import pytz
from datetime import datetime, timedelta
def scrape_scical():
data = urllib2.urlopen('http://www.hopkinsmedicine.org/scical/').read()
soup = BeautifulSoup(data)
cal = Calendar()
cal.add('prodid', '-//Hopkins Science Calendar//mattshirley.com/scical//')
cal.add('version', '2.0')
rows = soup.find_all('tr')
events = list()
for col in rows:
strongs = col.find_all('strong')
strongs_list = list()
for item in strongs:
strongs_list.append(item.get_text().encode('ascii','ignore').translate(None, '\t\r'))
breaks = col.find_all('br')
breaks_list = list()
for item in breaks:
breaks_list.extend(filter(len, re.split('\n+', item.get_text().encode('ascii','ignore').translate(None, '\t\r'))))
events.append(strongs_list + breaks_list[:4])
for item in events:
if len(item) == 0:
continue
event = Event()
event.add('summary', item[1])
event.add('location', item[5])
event.add('description', ','.join(item[3:]))
date_start = datetime.strptime(' '.join([item[0], item[2]]), '%A %b %d, %Y %I:%M %p')
date_end = date_start + timedelta(hours=1)
event.add('dtstart', date_start)
event.add('dtend', date_end)
event.add('dtstamp', date_start)
cal.add_component(event)
return cal.to_ical()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment