Skip to content

Instantly share code, notes, and snippets.

@nickbarnwell
Last active December 11, 2015 19:38
Show Gist options
  • Save nickbarnwell/4649575 to your computer and use it in GitHub Desktop.
Save nickbarnwell/4649575 to your computer and use it in GitHub Desktop.
Python script for parsing the MSFT S13 Intern Start Date Document
import csv
import string
import sys
import re
RE_DATE = re.compile("([a-zA-Z]+) (\d+)$")
RE_INTERN = re.compile("(.+) \((.+)\)")
def parse_intern(line):
name, data = RE_INTERN.match(line).group(1,2)
intern = {'name': name}
#I can't believe this returns None instead of a new copy of the dict.
#So impure
intern.update(parse_data(data))
return intern
def parse_data(data):
parse_date = lambda x: int(x[0])
data = map(string.strip, data.split(','))
return { 'position': data[0],
'team': data[1],
'num': parse_date(data[2])
}
def process_interns_file(filename):
interns = []
with open('dates.txt', 'r') as f:
for line in f:
if RE_DATE.match(line):
date = line.strip()
line = f.next()
while RE_INTERN.match(line):
intern = parse_intern(line)
intern['start'] = date
interns.append(intern)
line = f.next()
return interns
def output_csv(fname, data):
with open(fname, 'w') as f:
writer = csv.DictWriter(f, ['name', 'position', 'team', 'num', 'start'], restval='N/A')
writer.writeheader()
for row in data:
writer.writerow(row)
if __name__ == '__main__':
args = sys.argv #lolgoodpractice
if len(args) < 3:
print "Usage: python intern_parser.py infile outfile"
else:
output_csv(sys.argv[2], process_interns_file(sys.argv[1]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment