Created
August 11, 2011 17:29
-
-
Save shawnchin/1140238 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import itertools | |
from dateutil import parser | |
jumpwords = set(parser.parserinfo.JUMP) | |
keywords = set(kw.lower() for kw in itertools.chain( | |
parser.parserinfo.UTCZONE, | |
parser.parserinfo.PERTAIN, | |
(x for s in parser.parserinfo.WEEKDAYS for x in s), | |
(x for s in parser.parserinfo.MONTHS for x in s), | |
(x for s in parser.parserinfo.HMS for x in s), | |
(x for s in parser.parserinfo.AMPM for x in s), | |
)) | |
def parse_multiple(s): | |
def is_valid_kw(s): | |
try: # is it a number? | |
float(s) | |
return True | |
except ValueError: | |
return s.lower() in keywords | |
def _split(s): | |
kw_found = False | |
tokens = parser._timelex.split(s) | |
for i in xrange(len(tokens)): | |
if tokens[i] in jumpwords: | |
continue | |
if not kw_found and is_valid_kw(tokens[i]): | |
kw_found = True | |
start = i | |
elif kw_found and not is_valid_kw(tokens[i]): | |
kw_found = False | |
yield "".join(tokens[start:i]) | |
# handle date at end of input str | |
if kw_found: | |
yield "".join(tokens[start:]) | |
return [parser.parse(x) for x in _split(s)] | |
print parse_multiple("I like peas on 2011-04-23, and I also like them on easter and my birthday, the 29th of July, 1928") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment