Skip to content

Instantly share code, notes, and snippets.

@korenmiklos
Created December 15, 2013 20:21
Show Gist options
  • Save korenmiklos/7977625 to your computer and use it in GitHub Desktop.
Save korenmiklos/7977625 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
import sys
import re
import csv
where_regex = r"""(In <a href="(?P<url>.*?)">(?P<folder>.*?)</a>, )?"""
who_regex = r"(?P<who>.*?)"
verb_regex = r"(?P<verb>added|edited|deleted|moved|renamed) "
what_regex = r"""(the (file|folder) )?(.*?)<a (.*?) title="(?P<what>.*?)">(.*?)</a>\. """
when_regex = r"(?P<when>.*)$"
EVENT = re.compile(where_regex+who_regex+verb_regex+what_regex+when_regex)
SPLIT = re.compile(r"<img .*?>")
text = sys.stdin.read()
events = SPLIT.split(text)
output = csv.DictWriter(sys.stdout, fieldnames=['url', 'folder', 'who', 'verb', 'what', 'when'])
output.writeheader()
for event in events:
match = EVENT.search(event)
if match:
output.writerow(match.groupdict(event))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment