Skip to content

Instantly share code, notes, and snippets.

@rch
Created April 26, 2014 17:23
Show Gist options
  • Save rch/11325830 to your computer and use it in GitHub Desktop.
Save rch/11325830 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import re
from collections import OrderedDict
class Name(object):
def __init__(self, raw):
if raw is None:
self._first = ''
self._middle = ''
self._last = ''
else:
# names are really more complicated, for example:
assert raw.find('-') == -1
parts = map(lambda x: x.strip(), re.split(',| ', raw))
parts.extend(['']*(3-len(parts)))
if raw.find(',') == -1:
# without delimiter
if len(filter(None, parts)) == 2:
self._first=parts[0]
self._middle=''
self._last=parts[1]
else:
self._first=parts[0]
self._middle=parts[1]
self._last=parts[2]
elif raw.find(',') > 0:
# with delimiter
self._last=parts[0]
self._first=parts[1]
self._middle=parts[2]
else:
raise Exception(raw)
def update(self, name):
self.first = name.first
self.middle = name.middle
self.last = name.last
def __str__(self):
return ' '.join(filter(None,(self.first, self.middle, self.last)))
@property
def first(self):
return self._first
@first.setter
def first(self, name):
self._first = max([self._first, name], key=len)
@property
def middle(self):
return self._middle
@middle.setter
def middle(self, name):
self._middle = max([self._middle, name], key=len)
@property
def last(self):
return self._last
@last.setter
def last(self, name):
self._last = max([self._last, name], key=len)
def ingest(filename='data.txt'):
data = OrderedDict()
with open (filename) as f:
for num, line in enumerate(f):
if num > 0:
name, id = line.strip().split(':')
assert len(id) > 0 and id.isdigit()
try:
data[id].append(Name(name))
except:
data[id] = [Name(name)]
else:
N = int(line)
assert N == num
return data
if __name__ == '__main__':
for id, lst in ingest().iteritems():
name = Name(None)
for entry in lst:
name.update(entry)
print '{}:{}'.format(name, id)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment