Skip to content

Instantly share code, notes, and snippets.

@ptgolden
Created May 26, 2012 02:58
Show Gist options
  • Save ptgolden/2791870 to your computer and use it in GitHub Desktop.
Save ptgolden/2791870 to your computer and use it in GitHub Desktop.
Python validator for Library of Congress's Extended Date/Time Format (Level 1)
import re
def is_valid_edtf(raw_string):
intervals = raw_string.split('/')
if len(intervals) == 1:
d1, = intervals
return validate_single_date(d1)
elif len(intervals) == 2:
d1, d2 = intervals
return bool(
(d1 == 'unknown' or validate_single_date(d1)) and
(d2 == 'unknown' or d2 == 'open' or validate_single_date(d2)) )
else:
return False
def validate_single_date(date_str):
date_str = re.sub('[~?]*$', '', date_str)
date_parts = date_str.split('-')
unclear_year = re.compile(
r'^-?\d{0,2}(?:\d{2}|\du|uu)$')
known_year = re.compile(
r'^-?(?:\d{1,4}|y\d{5,})$')
possibly_unclear = re.compile(
r'^(?:\d{2}|uu)$')
tests = []
if len(date_parts) == 1:
year, = date_parts
tests.append(bool(
re.match(unclear_year, year) or re.match(known_year, year) ))
elif len(date_parts) == 2:
year, month = date_parts
tests.append(bool(
re.match(known_year, year) and re.match(possibly_unclear, month) ))
if re.match(r'\d', month):
tests.append( 1 <= int(m) <= 12 or 21 <= int(m) <= 24 )
elif len(date_parts) == 3:
year, month, day = date_parts
tests.append(bool(re.match(known_year, year) and
((month == 'uu' and day == 'uu') or
(re.match(r'^\d{2}', month) and re.match(possibly_unclear, day))) ))
if re.match(r'\d', month):
tests.append( 1 <= int(m) <= 12 )
if re.match(r'\d', day):
tests.append( 1 <= int(d) <= 31 )
else:
tests.append(False)
return all(tests)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment