Created
April 26, 2014 17:23
-
-
Save rch/11325830 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import re | |
from collections import OrderedDict | |
class Name(object): | |
def __init__(self, raw): | |
if raw is None: | |
self._first = '' | |
self._middle = '' | |
self._last = '' | |
else: | |
# names are really more complicated, for example: | |
assert raw.find('-') == -1 | |
parts = map(lambda x: x.strip(), re.split(',| ', raw)) | |
parts.extend(['']*(3-len(parts))) | |
if raw.find(',') == -1: | |
# without delimiter | |
if len(filter(None, parts)) == 2: | |
self._first=parts[0] | |
self._middle='' | |
self._last=parts[1] | |
else: | |
self._first=parts[0] | |
self._middle=parts[1] | |
self._last=parts[2] | |
elif raw.find(',') > 0: | |
# with delimiter | |
self._last=parts[0] | |
self._first=parts[1] | |
self._middle=parts[2] | |
else: | |
raise Exception(raw) | |
def update(self, name): | |
self.first = name.first | |
self.middle = name.middle | |
self.last = name.last | |
def __str__(self): | |
return ' '.join(filter(None,(self.first, self.middle, self.last))) | |
@property | |
def first(self): | |
return self._first | |
@first.setter | |
def first(self, name): | |
self._first = max([self._first, name], key=len) | |
@property | |
def middle(self): | |
return self._middle | |
@middle.setter | |
def middle(self, name): | |
self._middle = max([self._middle, name], key=len) | |
@property | |
def last(self): | |
return self._last | |
@last.setter | |
def last(self, name): | |
self._last = max([self._last, name], key=len) | |
def ingest(filename='data.txt'): | |
data = OrderedDict() | |
with open (filename) as f: | |
for num, line in enumerate(f): | |
if num > 0: | |
name, id = line.strip().split(':') | |
assert len(id) > 0 and id.isdigit() | |
try: | |
data[id].append(Name(name)) | |
except: | |
data[id] = [Name(name)] | |
else: | |
N = int(line) | |
assert N == num | |
return data | |
if __name__ == '__main__': | |
for id, lst in ingest().iteritems(): | |
name = Name(None) | |
for entry in lst: | |
name.update(entry) | |
print '{}:{}'.format(name, id) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment