hassaku63/tokenizer.py

## tokenizer.py
from enum import Enum
from functools import wraps


def assert_simgle_char(f):
    @wraps(f)
    def inner(c: str):
        if len(c) != 1:
            raise ValueError(c)
        return f(c)
    return inner


@assert_simgle_char
def is_space(c):
    return c in '\s\n\t\r '

@assert_simgle_char
def is_numeric(c):
    return c in '0123456789'


def tokenize(code: str) -> list[str]:
    tokens = []
    i = 0
    while (True):
        c = code[i]
        if c in '()':
            tokens.append(c)
            i += 1
        elif c in '+-*/':
            tokens.append(c)
            i += 1
        elif is_numeric(c):
            buf = ''
            while (True):
                if is_numeric(code[i]):
                    buf += code[i]
                    i += 1
                    continue
                tokens.append(buf)
                break
        elif is_space(c):
            i += 1

        if len(code) <= i:
            break

    return tokens


if __name__ == '__main__':
    ret = tokenize('(+ 1 23)')
    print(ret)

    ret = tokenize('(* (+ 100 234) 987))')
    print(ret)
	from enum import Enum
	from functools import wraps


	def assert_simgle_char(f):
	@wraps(f)
	def inner(c: str):
	if len(c) != 1:
	raise ValueError(c)
	return f(c)
	return inner


	@assert_simgle_char
	def is_space(c):
	return c in '\s\n\t\r '

	@assert_simgle_char
	def is_numeric(c):
	return c in '0123456789'


	def tokenize(code: str) -> list[str]:
	tokens = []
	i = 0
	while (True):
	c = code[i]
	if c in '()':
	tokens.append(c)
	i += 1
	elif c in '+-*/':
	tokens.append(c)
	i += 1
	elif is_numeric(c):
	buf = ''
	while (True):
	if is_numeric(code[i]):
	buf += code[i]
	i += 1
	continue
	tokens.append(buf)
	break
	elif is_space(c):
	i += 1

	if len(code) <= i:
	break

	return tokens


	if __name__ == '__main__':
	ret = tokenize('(+ 1 23)')
	print(ret)

	ret = tokenize('(* (+ 100 234) 987))')
	print(ret)