Skip to content

Instantly share code, notes, and snippets.

@SpotlightKid
Last active August 29, 2015 14:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save SpotlightKid/8c7f73cb8f936dc54c04 to your computer and use it in GitHub Desktop.
Save SpotlightKid/8c7f73cb8f936dc54c04 to your computer and use it in GitHub Desktop.
Parse ISO-8601 formatted date/time strings with UTC offsets.
# -*- coding:utf-8 -*-
"""Parse ISO-8601 formatted date/time strings.
Uses only standard library modules and supports UTC offsets.
"""
__all__ = ("parse_isodate", "UTCOffset")
import re
from datetime import datetime, timedelta, tzinfo
ISO_DATE_FMT = '%Y-%m-%d'
ISO_DT_FMT = '%Y-%m-%dT%H:%M:%S'
ISO_DT_MS_FMT = '%Y-%m-%dT%H:%M:%S.%f'
TZ_RX = re.compile(
r'(?P<sign>[-+])(?P<hours>[01]?\d):(?P<minutes>[0-6]?\d)$')
TD_ZERO = timedelta(0)
class UTCOffset(tzinfo):
"""Time zone info for fixed offset from UTC in hour and minutes.
Does not support Daylight Saving Time (DST) adjustment (the ``dts``
always returns a ``datetime.timedelta`` instance with 0 seconds.
"""
def __init__(self, name, hours=0, minutes=0):
"""Set time zone name and offset."""
self.name = name
self.offset = timedelta(hours=hours, minutes=minutes)
if not -12 <= self.offset.total_seconds() / 3600 <= 14:
raise ValueError("Time offset not in range -12:00 .. +14:00")
def utcoffset(self, dt):
"""Return UTC offset as a datetime.timedelta instance."""
return self.offset
def tzname(self, dt):
"""Return time zone name."""
return self.name
def dst(self, dt):
"""Return adjustment for DST as a datetime.timedelta instance."""
return TD_ZERO
def __repr__(self):
"""Return string representation of time zone instance."""
return "<tzinfo %s>" % self.name
def parse_isodate(datestr, ms_sep='.'):
"""Parse datestr and return a datetime or date instance.
``datestr`` must be an ISO-8601 formatted string giving either a full
timestamp with date and time components or only a full date component. The
microseconds and UTC offset parts of the time component are optional.
Parsing is somewhat lax in that all fields may be given with less than the
required number of digits as long as the value is valid, except the year,
which must have four digits.
If ``datestr`` contains only a date component, returns a ``datetime.date``
instance, otherwise a ``datetime.datetime`` instance. If a valid UTC offset
is present, the ``datetime.datetime`` instance will have the ``tzinfo``
member set to an instance of ``UTCOffset``.
If ``datestr`` can't be parsed, raises a ``ValueError``.
"""
offset = {}
def get_tz(m):
sign = 1 if m.group('sign') == '+' else -1
hours = int(m.group('hours'))
minutes = int(m.group('minutes'))
name = "%s%02i:%02i" % (m.group('sign'), hours, minutes)
offset['tzinfo'] = UTCOffset(name, hours * sign, minutes * sign)
return ''
ds = TZ_RX.sub(get_tz, datestr).rstrip('Z')
try:
try:
dt = datetime.strptime(ds, ISO_DT_MS_FMT)
except ValueError:
dt = datetime.strptime(ds.rsplit(ms_sep, 1)[0], ISO_DT_FMT)
return dt.replace(**offset)
except ValueError:
try:
return datetime.strptime(ds, ISO_DATE_FMT).date()
except ValueError:
raise ValueError(
"'%s' is not parsable as an ISO-8901 date/time)." % datestr)
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import re
from datetime import date, datetime
from nose.tools import assert_raises
from parsedate import parse_isodate, UTCOffset, TZ_RX
def test_timezone_regex():
totest = (
('XXX+10:30', True, "positive offset and hours >= 10"),
('-02:30', True, "negative offset"),
('XXX+04:5', True, "minutes only one digit but valid"),
('XXX-6:20', True, "hours only one digit, but valid value"),
('XXX-1:00', True, "negative offset an one-digit hour"),
('sGlasAlncSs', False, "random chars"),
('m2k73ls7s8t', False, "random alphanums"),
('4849263056', False, "random digits"),
('XXX+21:00', False, '1st hour digit > 1'),
('XXX+02:75', False, '1st minute digit > 6'),
('XXX+06:00Z', False, 'trailing char'),
)
for datestr, shouldpass, msg in totest:
match = TZ_RX.search(datestr)
assert bool(match) is shouldpass, (
"Should pass: %s" if shouldpass else "Should fail: %s") % msg
def test_parse_isodate_pass():
totest = (
'1990-01-01',
'1990-5-21',
'1990-12-6',
'1990-3-4',
'1990-01-01',
'1990-01-01T12:00:00',
'1990-01-01T12:00:00.123',
'1990-01-01T12:00:00.156456',
'1990-01-01T12:00:00Z',
'1990-01-01T12:00:00+01:00',
'1990-01-01T12:00:00-01:00',
'1990-01-01T12:00:00+10:30',
'1990-01-01T12:00:00-11:50',
'1990-01-01T12:00:00+1:00',
'1990-01-01T12:00:00-2:00',
'1990-01-01T12:00:00-3:0',
)
for datestr in totest:
assert isinstance(parse_isodate(datestr), (date, datetime))
def test_parse_isodate_fail():
totest = (
'14-01-01',
'2014-09',
'2014-20-05',
'2014-06-31',
'2014-07-32',
'1990-01-01T12:00',
'1990-01-01 12:00:00',
'1990-01-01T12:00:00T',
'1990-01-01T25:00:00',
'1990-01-01T12:75:00',
'1990-01-01T12:00:84',
'1990-01-01T12:00:00T',
'1990-01-01T12:00:00+02:00Z',
'1990-01-01T12:00:00+14:01',
'1990-01-01T12:00:00-12:01',
)
for datestr in totest:
assert_raises(ValueError, parse_isodate, datestr)
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""Benchmark ISO-8601 date parsing with parsedate against dateutil.parser."""
import timeit
loops = 10000
setup = """
from datetime import date, datetime
from dateutil.parser import parse as parse_date
from pytz import timezone
from parsedate import parse_isodate
tz = timezone("Europe/Berlin")
dd = date.today().isoformat()
dt = datetime.now().isoformat()
dt_noms = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
dt_tz = datetime.now(tz=tz).isoformat()
"""
def bench(stmt, setup=setup, loops=loops):
"""Benchmark stmt with timeit.repeat and print statistics."""
res = min(timeit.repeat(stmt, setup, number=loops))
print(stmt)
print("%s loops, best of 3: %.5f s / %.5f µs per loop" %
(loops, res, res / loops * 1000000))
print('')
bench("d = parse_date(dd) # dateutil, YYYY-mm-dd")
bench("d = parse_isodate(dd) # parsedate, YYYY-mm-dd)")
bench("d = parse_date(dt) # dateutil, YYYY-mm-ddTHH:MM:SS.MS")
bench("d = parse_isodate(dt) # parsedate, YYYY-mm-ddTHH:MM:SS.MS")
bench("d = parse_date(dt_noms) # dateutil), YYYY-mm-ddTHH:MM:SS")
bench("d = parse_isodate(dt_noms) # parsedate, YYYY-mm-ddTHH:MM:SS")
bench("d = parse_date(dt_tz) # dateutil, YYYY-mm-ddTHH:MM:SS.MS+HH:MM")
bench("d = parse_isodate(dt_tz) # parsedate, YYYY-mm-ddTHH:MM:SS.MS+HH:MM)")
@SpotlightKid
Copy link
Author

Benchmark results on my i5-3250M laptop with Python 2.7.8 on x86_64 Linux:

$ python timeit_parsedate.py 
d = parse_date(dd) # dateutil, YYYY-mm-dd
10000 loops, best of 3: 0.62021 s / 62.02090 µs per loop

d = parse_isodate(dd) # parsedate, YYYY-mm-dd)
10000 loops, best of 3: 0.33465 s / 33.46488 µs per loop

d = parse_date(dt) # dateutil, YYYY-mm-ddTHH:MM:SS.MS
10000 loops, best of 3: 1.04995 s / 104.99508 µs per loop

d = parse_isodate(dt) # parsedate, YYYY-mm-ddTHH:MM:SS.MS
10000 loops, best of 3: 0.21938 s / 21.93758 µs per loop

d = parse_date(dt_noms) # dateutil), YYYY-mm-ddTHH:MM:SS
10000 loops, best of 3: 0.96244 s / 96.24381 µs per loop

d = parse_isodate(dt_noms) # parsedate, YYYY-mm-ddTHH:MM:SS
10000 loops, best of 3: 0.30808 s / 30.80752 µs per loop

d = parse_date(dt_tz) # dateutil, YYYY-mm-ddTHH:MM:SS.MS+HH:MM
10000 loops, best of 3: 1.33355 s / 133.35459 µs per loop

d = parse_isodate(dt_tz) # parsedate, YYYY-mm-ddTHH:MM:SS.MS+HH:MM)
10000 loops, best of 3: 0.34542 s / 34.54220 µs per loop

With PyPy 2.3.1:

$ pypy timeit_parsedate.py 
d = parse_date(dd) # dateutil, YYYY-mm-dd
10000 loops, best of 3: 0.13532 s / 13.53230 µs per loop

d = parse_isodate(dd) # parsedate, YYYY-mm-dd)
10000 loops, best of 3: 0.07435 s / 7.43470 µs per loop

d = parse_date(dt) # dateutil, YYYY-mm-ddTHH:MM:SS.MS
10000 loops, best of 3: 0.21100 s / 21.09959 µs per loop

d = parse_isodate(dt) # parsedate, YYYY-mm-ddTHH:MM:SS.MS
10000 loops, best of 3: 0.06989 s / 6.98931 µs per loop

d = parse_date(dt_noms) # dateutil), YYYY-mm-ddTHH:MM:SS
10000 loops, best of 3: 0.19793 s / 19.79289 µs per loop

d = parse_isodate(dt_noms) # parsedate, YYYY-mm-ddTHH:MM:SS
10000 loops, best of 3: 0.07839 s / 7.83889 µs per loop

d = parse_date(dt_tz) # dateutil, YYYY-mm-ddTHH:MM:SS.MS+HH:MM
10000 loops, best of 3: 0.24413 s / 24.41320 µs per loop

d = parse_isodate(dt_tz) # parsedate, YYYY-mm-ddTHH:MM:SS.MS+HH:MM)
10000 loops, best of 3: 0.09467 s / 9.46679 µs per loop

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment