Created
November 1, 2019 08:45
-
-
Save weaming/92199f2259349ff20d032aa7ff04e10f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import arrow | |
from datetime import datetime | |
def unify_date(text: str): | |
text = text.strip().lower() | |
if text.startswith('circa'): | |
text = text[len('circa') :].strip() | |
try: | |
return arrow.get(text).date() | |
except arrow.parser.ParserError as e: | |
# print('x', text) | |
pass | |
def gen_fmt(): | |
for sep in [' ', '-', '/']: | |
for fmt in [ | |
['%Y'], | |
['%b', '%Y'], | |
['%d', '%m', '%y'], | |
['%d', '%m', '%Y'], | |
['%d', '%b', '%Y'], | |
['%b', '%d', '%Y'], | |
]: | |
yield sep.join(fmt) | |
for fmt in gen_fmt(): | |
# print('f', fmt) | |
try: | |
return datetime.strptime(text, fmt).strftime('%Y-%m-%d') | |
except ValueError: | |
continue | |
return text | |
data = [ | |
{"dob": "1975"}, | |
{"dob": "1965."}, | |
{"dob": "Oct 1960"}, | |
{"dob": "Sep 1938."}, | |
{"dob": "03/11/1957"}, | |
{"dob": "02 Aug 1984"}, | |
{"dob": "05 Jan 1967."}, | |
{"dob": "1975 to 1978."}, | |
{"dob": "circa 07 Jul 1966"}, | |
{"dob": "01 Jan 1979 to 31 Dec 1979"}, | |
{"dob": "26 Sep 1946 to 07 Dec 1946."}, | |
] | |
for x in data: | |
text = x['dob'].strip('. ').lower().split('to')[0].strip() | |
print(unify_date(text)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment