Skip to content

Instantly share code, notes, and snippets.

@mmattax
Created September 24, 2013 15:05
Show Gist options
  • Save mmattax/6686137 to your computer and use it in GitHub Desktop.
Save mmattax/6686137 to your computer and use it in GitHub Desktop.
Parse address components [roughly] out of a string.
#!/usr/bin/python
def parse_address_str(address):
parts = {
'address': '',
'address2': '',
'city': '',
'state': '',
'zip': ''
}
# Try and parse the street address components.
lines = address.strip().split('\n')
if len(lines) == 1:
# Look for a comma between the street and city.
tokens = lines[0].split(',', 1)
if len(tokens):
parts['address'], address = tokens
else:
# We could do some really hacky parsing, but maybe it's better to
# simply return an empty value.
return parts
elif len(lines) == 2:
parts['address'], address = lines
elif len(lines) == 3:
parts['address'], parts['address2'], address = lines
# At this point we assume address contains the city, state, and zip. We now
# parse those values out.
address = address.strip()
# We hope for format like "San Francisco, CA 94105"
if ',' in address:
parts['city'], address = address.split(',', 1)
tokens = address.strip().split()
if len(tokens):
# Assume the zip/postal is the last token.
parts['zip'] = tokens.pop()
# Treat everything else as the state.
parts['state'] = ' '.join(tokens)
else:
# We hope for format like "San Francisco CA 94105"
tokens = address.strip().split()
if len(tokens) >= 3:
parts['zip'] = tokens.pop()
parts['state'] = tokens.pop()
parts['city'] = ' '.join(tokens)
elif len(tokens):
for part in ['city', 'state', 'zip']:
if len(tokens) == 0:
break
parts[part] = token.pop(0)
return parts
if __name__ == '__main__':
addys = []
addys.append('123 Main Street, San Francisco CA 94105')
addys.append('123 Main Street, San Francisco, CA 94105')
addys.append("""\
123 Main Street
San Francisco CA 94105
""")
addys.append("""\
123 Main Street
San Francisco, CA 94105
""")
import pprint
pp = pprint.PrettyPrinter(indent=4)
pp.pprint([parse_address_str(addr) for addr in addys])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment