Skip to content

Instantly share code, notes, and snippets.

@bertspaan
Created July 5, 2012 14:49
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bertspaan/3054114 to your computer and use it in GitHub Desktop.
Save bertspaan/3054114 to your computer and use it in GitHub Desktop.
Parser for Dutch natural language time strings
#!/usr/local/Cellar/python3/3.2.3/bin/python3.2
from pypeg2 import *
import datetime
numbers = [
"nul",
"een",
"twee",
"drie",
"vier",
"vijf",
"zes",
"zeven",
"acht",
"negen",
"tien",
"elf",
"twaalf",
"dertien",
"veertien",
"vijftien",
"zestien",
"zeventien",
"achttien",
"negentien",
"twintig",
"eenentwintig",
"tweeëntwintig",
"drieëntwintig",
"vierentwintig",
"vijfentwintig",
"zesentwintig",
"zevenentwintig",
"achtentwintig",
"negenentwintig",
"dertig",
"eenendertig",
"tweeëndertig",
"drieëndertig",
"vierendertig",
"vijfendertig",
"zesendertig",
"zevenendertig",
"achtendertig",
"negenendertig",
"veertig",
"eenenveertig",
"tweeënveertig",
"drieënveertig",
"vierenveertig",
"vijfenveertig",
"zesenveertig",
"zevenenveertig",
"achtenveertig",
"negenenveertig",
"vijftig",
"eenenvijftig",
"tweeënvijftig",
"drieënvijftig",
"vierenvijftig",
"vijfenvijftig",
"zesenvijftig",
"zevenenvijftig",
"achtenvijftig",
"negenenvijftig",
"zestig"
]
times = [
"12 56",
"12:56",
"twaalf dertig",
"negen vijftien",
"kwart voor twee",
"tien voor twee",
"drie over half tien",
"13 uur 52",
"half drie",
"zevenenveertig over tien",
"zeven voor half vier",
"achttien vijftien",
"zeven",
"negen uur",
"8 uur",
"achttien uur vijftien"
]
class Number(str):
grammar = word
class Hours(object):
grammar = flag("half", K("half")), attr("value", Number), optional("uur")
class SignValue(Keyword):
grammar = Enum( K("voor"), K("over") )
class Sign(object):
grammar = attr("value", SignValue)
class Minutes(object):
grammar = attr("value", Number)
class Time(List):
grammar = [
(Minutes, Sign, Hours),
(Minutes, Sign, Hours),
(Hours, optional(":"), Minutes),
Hours
]
def string_to_int(str):
if str == "kwart":
return 15
for i in range(0, 60):
if str == numbers[i]:
return i
return int(str)
def to_time(thing):
minutes_str = ""
hours_str = ""
half = False
sign = 1
for element in thing:
if isinstance(element, Hours):
hours_str = element.value
half = element.half
elif isinstance(element, Minutes):
minutes_str = element.value
elif isinstance(element, Sign):
if element.value == "voor":
sign = -1
minutes = 0
if len(hours_str) > 0:
minutes = string_to_int(hours_str) * 60
if half:
minutes -= 30
if len(minutes_str) > 0:
minutes += sign * string_to_int(minutes_str)
hours = minutes // 60
minutes = minutes - (60 * hours)
today = datetime.date.today() #+ datetime.timedelta(days=1)
return datetime.datetime.combine(today, datetime.time(hours, minutes))
for time_str in times:
f = parse(time_str, Time)
time = to_time(f)
print(time_str, " => ", time)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment