Last active
August 29, 2015 14:05
-
-
Save SpotlightKid/8c7f73cb8f936dc54c04 to your computer and use it in GitHub Desktop.
Parse ISO-8601 formatted date/time strings with UTC offsets.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding:utf-8 -*- | |
"""Parse ISO-8601 formatted date/time strings. | |
Uses only standard library modules and supports UTC offsets. | |
""" | |
__all__ = ("parse_isodate", "UTCOffset") | |
import re | |
from datetime import datetime, timedelta, tzinfo | |
ISO_DATE_FMT = '%Y-%m-%d' | |
ISO_DT_FMT = '%Y-%m-%dT%H:%M:%S' | |
ISO_DT_MS_FMT = '%Y-%m-%dT%H:%M:%S.%f' | |
TZ_RX = re.compile( | |
r'(?P<sign>[-+])(?P<hours>[01]?\d):(?P<minutes>[0-6]?\d)$') | |
TD_ZERO = timedelta(0) | |
class UTCOffset(tzinfo): | |
"""Time zone info for fixed offset from UTC in hour and minutes. | |
Does not support Daylight Saving Time (DST) adjustment (the ``dts`` | |
always returns a ``datetime.timedelta`` instance with 0 seconds. | |
""" | |
def __init__(self, name, hours=0, minutes=0): | |
"""Set time zone name and offset.""" | |
self.name = name | |
self.offset = timedelta(hours=hours, minutes=minutes) | |
if not -12 <= self.offset.total_seconds() / 3600 <= 14: | |
raise ValueError("Time offset not in range -12:00 .. +14:00") | |
def utcoffset(self, dt): | |
"""Return UTC offset as a datetime.timedelta instance.""" | |
return self.offset | |
def tzname(self, dt): | |
"""Return time zone name.""" | |
return self.name | |
def dst(self, dt): | |
"""Return adjustment for DST as a datetime.timedelta instance.""" | |
return TD_ZERO | |
def __repr__(self): | |
"""Return string representation of time zone instance.""" | |
return "<tzinfo %s>" % self.name | |
def parse_isodate(datestr, ms_sep='.'): | |
"""Parse datestr and return a datetime or date instance. | |
``datestr`` must be an ISO-8601 formatted string giving either a full | |
timestamp with date and time components or only a full date component. The | |
microseconds and UTC offset parts of the time component are optional. | |
Parsing is somewhat lax in that all fields may be given with less than the | |
required number of digits as long as the value is valid, except the year, | |
which must have four digits. | |
If ``datestr`` contains only a date component, returns a ``datetime.date`` | |
instance, otherwise a ``datetime.datetime`` instance. If a valid UTC offset | |
is present, the ``datetime.datetime`` instance will have the ``tzinfo`` | |
member set to an instance of ``UTCOffset``. | |
If ``datestr`` can't be parsed, raises a ``ValueError``. | |
""" | |
offset = {} | |
def get_tz(m): | |
sign = 1 if m.group('sign') == '+' else -1 | |
hours = int(m.group('hours')) | |
minutes = int(m.group('minutes')) | |
name = "%s%02i:%02i" % (m.group('sign'), hours, minutes) | |
offset['tzinfo'] = UTCOffset(name, hours * sign, minutes * sign) | |
return '' | |
ds = TZ_RX.sub(get_tz, datestr).rstrip('Z') | |
try: | |
try: | |
dt = datetime.strptime(ds, ISO_DT_MS_FMT) | |
except ValueError: | |
dt = datetime.strptime(ds.rsplit(ms_sep, 1)[0], ISO_DT_FMT) | |
return dt.replace(**offset) | |
except ValueError: | |
try: | |
return datetime.strptime(ds, ISO_DATE_FMT).date() | |
except ValueError: | |
raise ValueError( | |
"'%s' is not parsable as an ISO-8901 date/time)." % datestr) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding:utf-8 -*- | |
import re | |
from datetime import date, datetime | |
from nose.tools import assert_raises | |
from parsedate import parse_isodate, UTCOffset, TZ_RX | |
def test_timezone_regex(): | |
totest = ( | |
('XXX+10:30', True, "positive offset and hours >= 10"), | |
('-02:30', True, "negative offset"), | |
('XXX+04:5', True, "minutes only one digit but valid"), | |
('XXX-6:20', True, "hours only one digit, but valid value"), | |
('XXX-1:00', True, "negative offset an one-digit hour"), | |
('sGlasAlncSs', False, "random chars"), | |
('m2k73ls7s8t', False, "random alphanums"), | |
('4849263056', False, "random digits"), | |
('XXX+21:00', False, '1st hour digit > 1'), | |
('XXX+02:75', False, '1st minute digit > 6'), | |
('XXX+06:00Z', False, 'trailing char'), | |
) | |
for datestr, shouldpass, msg in totest: | |
match = TZ_RX.search(datestr) | |
assert bool(match) is shouldpass, ( | |
"Should pass: %s" if shouldpass else "Should fail: %s") % msg | |
def test_parse_isodate_pass(): | |
totest = ( | |
'1990-01-01', | |
'1990-5-21', | |
'1990-12-6', | |
'1990-3-4', | |
'1990-01-01', | |
'1990-01-01T12:00:00', | |
'1990-01-01T12:00:00.123', | |
'1990-01-01T12:00:00.156456', | |
'1990-01-01T12:00:00Z', | |
'1990-01-01T12:00:00+01:00', | |
'1990-01-01T12:00:00-01:00', | |
'1990-01-01T12:00:00+10:30', | |
'1990-01-01T12:00:00-11:50', | |
'1990-01-01T12:00:00+1:00', | |
'1990-01-01T12:00:00-2:00', | |
'1990-01-01T12:00:00-3:0', | |
) | |
for datestr in totest: | |
assert isinstance(parse_isodate(datestr), (date, datetime)) | |
def test_parse_isodate_fail(): | |
totest = ( | |
'14-01-01', | |
'2014-09', | |
'2014-20-05', | |
'2014-06-31', | |
'2014-07-32', | |
'1990-01-01T12:00', | |
'1990-01-01 12:00:00', | |
'1990-01-01T12:00:00T', | |
'1990-01-01T25:00:00', | |
'1990-01-01T12:75:00', | |
'1990-01-01T12:00:84', | |
'1990-01-01T12:00:00T', | |
'1990-01-01T12:00:00+02:00Z', | |
'1990-01-01T12:00:00+14:01', | |
'1990-01-01T12:00:00-12:01', | |
) | |
for datestr in totest: | |
assert_raises(ValueError, parse_isodate, datestr) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding:utf-8 -*- | |
"""Benchmark ISO-8601 date parsing with parsedate against dateutil.parser.""" | |
import timeit | |
loops = 10000 | |
setup = """ | |
from datetime import date, datetime | |
from dateutil.parser import parse as parse_date | |
from pytz import timezone | |
from parsedate import parse_isodate | |
tz = timezone("Europe/Berlin") | |
dd = date.today().isoformat() | |
dt = datetime.now().isoformat() | |
dt_noms = datetime.now().strftime('%Y-%m-%dT%H:%M:%S') | |
dt_tz = datetime.now(tz=tz).isoformat() | |
""" | |
def bench(stmt, setup=setup, loops=loops): | |
"""Benchmark stmt with timeit.repeat and print statistics.""" | |
res = min(timeit.repeat(stmt, setup, number=loops)) | |
print(stmt) | |
print("%s loops, best of 3: %.5f s / %.5f µs per loop" % | |
(loops, res, res / loops * 1000000)) | |
print('') | |
bench("d = parse_date(dd) # dateutil, YYYY-mm-dd") | |
bench("d = parse_isodate(dd) # parsedate, YYYY-mm-dd)") | |
bench("d = parse_date(dt) # dateutil, YYYY-mm-ddTHH:MM:SS.MS") | |
bench("d = parse_isodate(dt) # parsedate, YYYY-mm-ddTHH:MM:SS.MS") | |
bench("d = parse_date(dt_noms) # dateutil), YYYY-mm-ddTHH:MM:SS") | |
bench("d = parse_isodate(dt_noms) # parsedate, YYYY-mm-ddTHH:MM:SS") | |
bench("d = parse_date(dt_tz) # dateutil, YYYY-mm-ddTHH:MM:SS.MS+HH:MM") | |
bench("d = parse_isodate(dt_tz) # parsedate, YYYY-mm-ddTHH:MM:SS.MS+HH:MM)") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Benchmark results on my i5-3250M laptop with Python 2.7.8 on x86_64 Linux:
With PyPy 2.3.1: