Skip to content

Instantly share code, notes, and snippets.

@gyuque
Created April 26, 2009 14:55
Show Gist options
  • Save gyuque/102055 to your computer and use it in GitHub Desktop.
Save gyuque/102055 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
import re
class ParseTwneruText(object):
sleeps = u'寝る 就寝 ねる 寝た ねた'.split()
wakes = u'起きた 起床 おきた'.split()
time = re.compile(u'([0-9]{1,2})[:時]([0-9]{1,2})')
date = re.compile(u'([1-9][0-9]?)日')
@classmethod
def parse(cls, text):
for sleep in cls.sleeps:
if sleep in text:
mode = 'sleep'
break
else:
for wake in cls.wakes:
if wake in text:
mode = 'wake'
break
else:
raise ValueError('parse error')
date = time = None
match = cls.time.search(text)
if match:
time = tuple(map(int, match.group(1, 2)))
match = cls.date.search(text)
if match:
date = int(match.group(1))
return date, time, mode
# parse = ParseTwneruText.parse
# print parse(u'13日の寝た時刻を24:00に')
# print parse(u'寝た時刻を24時00に')
# print parse(u'寝た')
# (13, (24, 0), 'sleep')
# (None, (24, 0), 'sleep')
# (None, None, 'sleep')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment