Skip to content

Instantly share code, notes, and snippets.

@weaming
Created November 1, 2019 08:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save weaming/92199f2259349ff20d032aa7ff04e10f to your computer and use it in GitHub Desktop.
Save weaming/92199f2259349ff20d032aa7ff04e10f to your computer and use it in GitHub Desktop.
import arrow
from datetime import datetime
def unify_date(text: str):
text = text.strip().lower()
if text.startswith('circa'):
text = text[len('circa') :].strip()
try:
return arrow.get(text).date()
except arrow.parser.ParserError as e:
# print('x', text)
pass
def gen_fmt():
for sep in [' ', '-', '/']:
for fmt in [
['%Y'],
['%b', '%Y'],
['%d', '%m', '%y'],
['%d', '%m', '%Y'],
['%d', '%b', '%Y'],
['%b', '%d', '%Y'],
]:
yield sep.join(fmt)
for fmt in gen_fmt():
# print('f', fmt)
try:
return datetime.strptime(text, fmt).strftime('%Y-%m-%d')
except ValueError:
continue
return text
data = [
{"dob": "1975"},
{"dob": "1965."},
{"dob": "Oct 1960"},
{"dob": "Sep 1938."},
{"dob": "03/11/1957"},
{"dob": "02 Aug 1984"},
{"dob": "05 Jan 1967."},
{"dob": "1975 to 1978."},
{"dob": "circa 07 Jul 1966"},
{"dob": "01 Jan 1979 to 31 Dec 1979"},
{"dob": "26 Sep 1946 to 07 Dec 1946."},
]
for x in data:
text = x['dob'].strip('. ').lower().split('to')[0].strip()
print(unify_date(text))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment