Skip to content

Instantly share code, notes, and snippets.

@hodbby
Created March 7, 2012 14:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hodbby/1993415 to your computer and use it in GitHub Desktop.
Save hodbby/1993415 to your computer and use it in GitHub Desktop.
regex
def find_year (filename):
file1 = open (filename, 'rU')
match = re.findall (r'>Popularity in ([\d]+)<', file1.read())
file1.close()
return match
def find_name_and_rank (filename):
temp_tuple_name_and_rank = {}
file2 = open (filename, 'rU')
temp_tuple_name_and_rank = re.findall (r'<tr align="right"><td>([\d+])</td><td>([\w]+)</td><td>([\w]+)</td>', file2.read())
file2.close
return temp_tuple_name_and_rank
def convert_tuple_to_unisex_list (tuple_name_and_rank):
dict_men = {}
dict_women = {}
# Convert tuple into dictionary
for item_tuple in tuple_name_and_rank:
dict_men[item_tuple[1]] = item_tuple[0]
for item_tuple in tuple_name_and_rank:
dict_women[item_tuple[2]] = item_tuple[0]
#convert dictionary into list
list_men = list(dict_men.items())
list_women = list (dict_women.items())
#sort both lists into one men and women sorted list
unisex_list = list_men
unisex_list.extend (list_women)
unisex_list.sort()
return (unisex_list)
def extract_names(filename):
"""
Given a file name for baby.html, returns a list starting with the year string
followed by the name-rank strings in alphabetical order.
['2006', 'Aaliyah 91', Aaron 57', 'Abagail 895', ' ...]
"""
tuple_name_and_rank = {}
# Looking for name and its rank and insert into a tuple
tuple_name_and_rank = find_name_and_rank (filename)
# Convert tuple into sorted list
list_sorted = convert_tuple_to_unisex_list (tuple_name_and_rank)
return list_sorted
def main():
args = sys.argv[1:]
if not args:
print 'usage: [--summaryfile] file [file ...]'
sys.exit(1)
summary = False
if args[0] == '--summaryfile':
summary = True
del args[0]
# For each filename, get the names, then either print the text output (False)
# or write it to a summary file (True)
if summary == False:
the_year_is = find_year (args[0])
print '\n', the_year_is,
list_sorted = extract_names(args[0])
print list_sorted
else:
list_sorted = extract_names(args[0])
file_output = open ('result.txt', 'a')
print >> file_output, find_year(args[0])
print >> file_output,list_sorted
file_output.close
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment