Skip to content

Instantly share code, notes, and snippets.

@jthielen
Last active September 4, 2019 18:46
Show Gist options
  • Save jthielen/57026fee4dc6599a3fb646000b8ca748 to your computer and use it in GitHub Desktop.
Save jthielen/57026fee4dc6599a3fb646000b8ca748 to your computer and use it in GitHub Desktop.
Idea for extensible unit string preprocessor for pint
class StringPreprocessor(object):
"""Constructor for a extensible unit expression string preprocessor."""
# List of default replacement pairs
_default_replacements = [(",", ""),
(" per ", "/"),
("^", "**")]
# List of default regex substitution pairs.
_default_subs_re = [('\N{DEGREE SIGN}', " degree"),
(r"([\w\.\-\+\*\\\^])\s+", r"\1 "), # merge multiple spaces
(r"({}) squared", r"\1**2"), # Handle square and cube
(r"({}) cubed", r"\1**3"),
(r"cubic ({})", r"\1**3"),
(r"square ({})", r"\1**2"),
(r"sq ({})", r"\1**2"),
(r"\b([0-9]+\.?[0-9]*)(?=[e|E][a-zA-Z]|[a-df-zA-DF-Z])", r"\1*"), # Handle numberLetter for multiplication
(r"([\w\.\-])\s+(?=\w)", r"\1*"), # Handle space for multiplication
]
# Define pretty format translation and regexes
_pretty_table = maketrans('⁰¹²³⁴⁵⁶⁷⁸⁹·⁻', '0123456789*-')
_pretty_exp_re = re.compile(r"⁻?[⁰¹²³⁴⁵⁶⁷⁸⁹]+(?:\.[⁰¹²³⁴⁵⁶⁷⁸⁹]*)?")
def __init__(self):
# Instantiate by compiling default regexes and setting replacements list from defaults
self.reset_regex_subs()
self.reset_replacements()
def __call__(self, input_string):
"""Preprocess input string according to defined replacements and regexes.
Processing occurs in the following order:
1) String replacements as defined by tuples in the replacements
2) Regex substitutions (both default and those added by `add_regex_sub()`)
3) Pretty text format character handling
"""
# String replacements
for current, replacement in self._replacements:
input_string = input_string.replace(current, replacement)
# Regex substitutions
for a, b in self._compiled_subs_re:
input_string = a.sub(b, input_string)
# Replace pretty format characters
for pretty_exp in self._pretty_exp_re.findall(input_string):
exp = '**' + pretty_exp.translate(self._pretty_table)
input_string = input_string.replace(pretty_exp, exp)
input_string = input_string.translate(self._pretty_table)
return input_string
def add_replacement(self, current, replacement):
"""Add given replacement pair to the replacement list."""
self._replacements.append((current, replacement))
def reset_replacements(self):
"""Reset replacement list to default."""
self._replacements = self._default_replacements
def add_regex_sub(self, regex_string, replacement):
"""Compile the given regex_string and append it to the regex sub list."""
self._compiled_subs_re.append((re.compile(regex_string), replacement))
def reset_regex_subs(self):
"""Reset regex substitution list to default."""
self._compiled_subs_re = [(re.compile(a.format(r"[_a-zA-Z][_a-zA-Z0-9]*")), b)
for a, b in self._default_subs_re]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment