Skip to content

Instantly share code, notes, and snippets.

@jaysoffian
Created March 7, 2012 18:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jaysoffian/1995010 to your computer and use it in GitHub Desktop.
Save jaysoffian/1995010 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
import sys
import re
def extract_names(path):
year = None
ranks = {}
def add_rank(name, rank):
# add name to ranks if its not already present or if its smaller
if name not in ranks or rank < ranks[name]:
ranks[name] = rank
with open(path) as f:
for line in f:
m = re.search(r'Popularity in (\d{4})', line)
if m:
year = m.group(1)
continue
m = re.search(
r'<td>(\d+)</td><td>([^<]+)</td><td>([^<]+)</td>', line)
if m:
rank, male_name, female_name = m.groups()
rank = int(rank)
add_rank(male_name, rank)
add_rank(female_name, rank)
return [year] + sorted("%s %s" % t for t in ranks.items())
def main(args):
summarize = '--summaryfile' in args
if summarize:
args.remove('--summaryfile')
for arg in args:
summary = '\n'.join(extract_names(arg))
if summarize:
with open(arg + '.summary', 'w') as f:
f.write(summary + '\n')
else:
print summary
if __name__ == '__main__':
main(sys.argv[1:])
@jaysoffian
Copy link
Author

Should probably not continue to search for year once its been found.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment