Skip to content

Instantly share code, notes, and snippets.

@clavery
Created July 27, 2015 04:17
Show Gist options
  • Save clavery/6b2fdbbe1b8e13e4df08 to your computer and use it in GitHub Desktop.
Save clavery/6b2fdbbe1b8e13e4df08 to your computer and use it in GitHub Desktop.
python matching of patterns to guess state names
from difflib import SequenceMatcher
states = {
"AL": "Alabama",
"AK": "Alaska",
"AS": "American Samoa",
"AZ": "Arizona",
"AR": "Arkansas",
"CA": "California",
"CO": "Colorado",
"CT": "Connecticut",
"DE": "Delaware",
"DC": "District Of Columbia",
"FM": "Federated States Of Micronesia",
"FL": "Florida",
"GA": "Georgia",
"GU": "Guam",
"HI": "Hawaii",
"ID": "Idaho",
"IL": "Illinois",
"IN": "Indiana",
"IA": "Iowa",
"KS": "Kansas",
"KY": "Kentucky",
"LA": "Louisiana",
"ME": "Maine",
"MH": "Marshall Islands",
"MD": "Maryland",
"MA": "Massachusetts",
"MI": "Michigan",
"MN": "Minnesota",
"MS": "Mississippi",
"MO": "Missouri",
"MT": "Montana",
"NE": "Nebraska",
"NV": "Nevada",
"NH": "New Hampshire",
"NJ": "New Jersey",
"NM": "New Mexico",
"NY": "New York",
"NC": "North Carolina",
"ND": "North Dakota",
"MP": "Northern Mariana Islands",
"OH": "Ohio",
"OK": "Oklahoma",
"OR": "Oregon",
"PW": "Palau",
"PA": "Pennsylvania",
"PR": "Puerto Rico",
"RI": "Rhode Island",
"SC": "South Carolina",
"SD": "South Dakota",
"TN": "Tennessee",
"TX": "Texas",
"UT": "Utah",
"VT": "Vermont",
"VI": "Virgin Islands",
"VA": "Virginia",
"WA": "Washington",
"WV": "West Virginia",
"WI": "Wisconsin",
"WY": "Wyoming"
}
def guess_state(val):
"""find a state"""
if val in states:
# state code match
return states[val].upper()
sequence_mappers = [SequenceMatcher(lambda x: x in " \t'", val.upper(), s.upper()) for s in states.values()]
ratios = [m.ratio() for m in sequence_mappers]
results = zip(sequence_mappers, ratios)
return sorted(results, key=lambda x: x[1])[-1][0].b
guess_state("minnesoda")
guess_state("NH")
guess_state("n.hampshire")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment