Skip to content

Instantly share code, notes, and snippets.

@Ffisegydd
Created September 14, 2014 16:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Ffisegydd/9fabe9d6f91ad3ae0e18 to your computer and use it in GitHub Desktop.
Save Ffisegydd/9fabe9d6f91ad3ae0e18 to your computer and use it in GitHub Desktop.
import requests
import bs4
from datetime import date, timedelta
transcript_url = 'http://chat.stackoverflow.com/transcript/6/{year}/{month}/{day}/0-24'
today = date.today()
day = timedelta(days=1)
# d = date(year=2010, month=10, day=15)
d = date(year=2014, month=9, day=11)
def parse_monologue(mono):
user_id = int(mono['class'][-1].split('-')[-1])
user_name = mono.find(attrs={'class':'username'}).text
out = []
for message in mono.findAll(attrs={'class':'message'}):
m = dict(user_id=user_id, user_name=user_name, date=d)
m['id'] = int(message['id'].split('-')[-1])
m['message'] = str(message.find(attrs={'class':'content'}))[22:7].strip()
out.append(m)
return out
while d < today:
r = requests.get(transcript_url.format(day=d.day, month=d.month, year=d.year))
soup = bs4.BeautifulSoup(r.text)
transcript = soup.find('div', {'id':'transcript'})
messages = [message for monologue in transcript.findAll(attrs={'class':'monologue'})
for message in parse_monologue(monologue)]
d += day
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment