Skip to content

Instantly share code, notes, and snippets.

@altnight
Last active August 29, 2015 14:23
Show Gist options
  • Save altnight/545658da496c1cbb94c1 to your computer and use it in GitHub Desktop.
Save altnight/545658da496c1cbb94c1 to your computer and use it in GitHub Desktop.
"""
- https://speakerdeck.com/anatoo/phpdexue-buvmxing-zheng-gui-biao-xian-enzinfalseshi-zu-mi
- http://blog.asial.co.jp/1221
"""
import copy
class RegexVMThread(object):
def __init__(self):
self.string_pointer = 0
self.program_counter = 0
class RegexVMInstruction(object):
CHAR = 0
MATCH = 1
JUMP = 2
SPLIT = 3
def __init__(self, instruction_type, param=None):
self.type_ = instruction_type
self.param = param
class RegexVM(object):
@classmethod
def run(cls, string, instructions):
threads = []
thread = RegexVMThread()
while True:
# 現在の命令を取り出す
instruction = instructions[thread.program_counter]
if not instruction:
raise Exception()
while True:
if instruction.type_ == RegexVMInstruction.CHAR:
# patch
try:
char = string[thread.string_pointer]
except IndexError:
char = 0
# マッチする場合
if char == instruction.param:
thread.string_pointer += 1
thread.program_counter += 1
else:
if threads:
# スレッドがまだある場合には、
# 現在のスレッドを捨ててそのスレッドに切り替える
thread = threads.pop()
else:
# スレッドがもうない場合には、失敗する
return False
break
elif instruction.type_ == RegexVMInstruction.SPLIT:
# スレッドを分割する
new_thread = copy.deepcopy(thread)
new_thread.program_counter = instruction.param
threads.insert(0, new_thread)
thread.program_counter += 1
break
elif instruction.type_ == RegexVMInstruction.MATCH:
# マッチする
return True
elif instruction.type_ == RegexVMInstruction.JUMP:
# 特定の命令に飛ぶ
thread.program_counter = instruction.param
break
vm = RegexVM()
instructions = [
RegexVMInstruction(RegexVMInstruction.CHAR, 'a'),
RegexVMInstruction(RegexVMInstruction.MATCH)
]
print(vm.run('a', instructions)) # True
print(vm.run('b', instructions)) # False
print('-' * 40)
instructions = [
RegexVMInstruction(RegexVMInstruction.CHAR, 'a'),
RegexVMInstruction(RegexVMInstruction.CHAR, 'b'),
RegexVMInstruction(RegexVMInstruction.CHAR, 'c'),
RegexVMInstruction(RegexVMInstruction.MATCH)
]
print(RegexVM.run('abc', instructions)); # True
print(RegexVM.run('bc', instructions)); # False
print('-' * 40)
# /(ab)|(cd)/という正規表現を表現する命令列
instructions = [
RegexVMInstruction(RegexVMInstruction.SPLIT, 4),
RegexVMInstruction(RegexVMInstruction.CHAR, 'a'),
RegexVMInstruction(RegexVMInstruction.CHAR, 'b'),
RegexVMInstruction(RegexVMInstruction.JUMP, 6),
RegexVMInstruction(RegexVMInstruction.CHAR, 'c'),
RegexVMInstruction(RegexVMInstruction.CHAR, 'd'),
RegexVMInstruction(RegexVMInstruction.MATCH)
]
print(RegexVM.run('ab', instructions)); # True
print(RegexVM.run('cd', instructions)); # True
print(RegexVM.run('bc', instructions)); # False
print('-' * 40)
# /a?/という正規表現を表現する命令列
instructions = [
RegexVMInstruction(RegexVMInstruction.SPLIT, 2),
RegexVMInstruction(RegexVMInstruction.CHAR, 'a'),
RegexVMInstruction(RegexVMInstruction.MATCH)
]
print(RegexVM.run('', instructions)); # => True
print(RegexVM.run('a', instructions)); # => True
print('-' * 40)
# /(ab)*/という正規表現を表現する命令列
instructions = [
RegexVMInstruction(RegexVMInstruction.SPLIT, 4),
RegexVMInstruction(RegexVMInstruction.CHAR, 'a'),
RegexVMInstruction(RegexVMInstruction.CHAR, 'b'),
RegexVMInstruction(RegexVMInstruction.JUMP, 0),
RegexVMInstruction(RegexVMInstruction.MATCH)
]
print(RegexVM.run('ab', instructions)); # => True
print(RegexVM.run('abab', instructions)); # => True
print(RegexVM.run('ababab', instructions)); # => True
print(RegexVM.run('', instructions)); # => True
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment