|
# -*- coding: utf-8 -*- |
|
import ast |
|
from pygments.lexer import RegexLexer, bygroups, using |
|
from pygments.token import Error, Keyword, Name, Number, Operator, String, Whitespace |
|
|
|
|
|
__all__ = ["ArgumentsLexer", "ValueLexer"] |
|
|
|
|
|
class ValueLexer(RegexLexer): |
|
""" A small lexer to analyze string, number, boolean and variable name. """ |
|
tokens = { |
|
'root': [ |
|
(r"^(?P<delimiter>['\"])(.*?)(?P=delimiter)$", String), |
|
(r'^([Ff]alse|[Tt]rue)$', Keyword), |
|
(r'^(\d+\.\d*|\d*\.\d+)(e[+-]?[0-9]+)?$', Number.Float), |
|
(r'^0o[0-7]+$', Number.Oct), |
|
(r'^0x[a-fA-F0-9]+$', Number.Hex), |
|
(r'^0b[01]+$', Number.Bin), |
|
(r'^\d+$', Number.Integer), |
|
(r'^[^\s]+$', String), |
|
], |
|
} |
|
|
|
def parse(self, text): |
|
""" Parse input text and convert it to its base type. """ |
|
if any([token is Error for token, value in self.get_tokens(text)]): |
|
return |
|
token, value = list(self.get_tokens(text))[0] |
|
if token is Keyword: |
|
if value in ["true", "True", "false", "False"]: |
|
value = value in ["true", "True"] |
|
elif token is String: |
|
value = value.strip("'\"") |
|
elif token is Number.Integer: |
|
value = int(value) |
|
elif token is Number.Float: |
|
value = float(value) |
|
elif token in [Number.Oct, Number.Hex, Number.Bin]: |
|
value = ast.literal_eval(value) |
|
return value |
|
|
|
|
|
class ArgumentsLexer(RegexLexer): |
|
""" A lexer to analyze command arguments with the following structure: |
|
arg1 arg2 ... argN kwarg1 kwarg2 ... kwargM """ |
|
tokens = { |
|
'root': [ |
|
(r'((?:[a-zA-Z]|\_{1,2})(?:[a-zA-Z0-9-_]*[a-zA-Z0-9])?)(=)((?P<delimiter>[\'"]).*?(?P=delimiter))', |
|
bygroups(Name, Operator, using(ValueLexer)), ('kwargs', '#push')), |
|
(r'((?:[a-zA-Z]|\_{1,2})(?:[a-zA-Z0-9-_]*[a-zA-Z0-9])?)(=)([^\s]+)', |
|
bygroups(Name, Operator, using(ValueLexer)), ('kwargs', '#push')), |
|
(r'(?P<delimiter>[\'"])(.*?)(?P=delimiter)', using(ValueLexer), '#push'), |
|
(r'[^\s]+', using(ValueLexer), '#push'), |
|
(r'\s+', Whitespace, '#pop'), |
|
], |
|
'kwargs': [ |
|
(r'((?:[a-zA-Z]|\_{1,2})(?:[a-zA-Z0-9-_]*[a-zA-Z0-9])?)(=)((?P<delimiter>[\'"]).*?(?P=delimiter))', |
|
bygroups(Name, Operator, using(ValueLexer)), '#push'), |
|
(r'((?:[a-zA-Z]|\_{1,2})(?:[a-zA-Z0-9-_]*[a-zA-Z0-9])?)(=)([^\s]+)', |
|
bygroups(Name, Operator, using(ValueLexer)), '#push'), |
|
(r'\s+', Whitespace, '#pop'), |
|
], |
|
} |
|
|
|
def parse(self, text): |
|
""" Parse the input text and return a tuple of arguments and a dictionary of keyword-arguments. """ |
|
if any([token is Error for token, value in self.get_tokens(text)]): |
|
return 2 * (None, ) |
|
tokens, args, kwargs = self.get_tokens(text), [], {} |
|
vl = ValueLexer() |
|
for token, value in tokens: |
|
if token is Whitespace: |
|
continue |
|
elif token is Name: |
|
next(tokens) # pass the Operator '=' |
|
_, v = next(tokens) |
|
kwargs.update({value: vl.parse(v)}) |
|
else: |
|
args.append(vl.parse(value)) |
|
return tuple(args), kwargs |