Created
August 31, 2020 20:23
-
-
Save Yaulendil/fb124366f734d24dfbe2e664e6312d80 to your computer and use it in GitHub Desktop.
RxFmt - A streamlined factory function for reformatting Python Strings, using Regular Expressions and groups.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""RxFmt: Implements a factory function to convert a Regular Expression Pattern, | |
a String, and a Dict into a new function. The new function takes a String, | |
breaks it into the groups in the Pattern, and formats the groups into a new | |
String. | |
Example: | |
>>> hl = reformatter( | |
... r"(?P<INDEX>\d+)\W*(?P<WORD>.*)", # RegEx Pattern defining named groups. | |
... "{WORD} : {INDEX:0>3}", # Python String taking the groups as fields. | |
... {"WORD": str.upper} # (Optional) Dict mapping group names to functions. | |
... ) | |
>>> hl("1. asdf") # 'ASDF : 001' | |
>>> hl("1234!!!qwert") # 'QWERT : 1234' | |
""" | |
from re import compile | |
from typing import ( | |
Callable, | |
Dict, | |
Match, | |
Optional, | |
Pattern, | |
Sequence, | |
TypeVar, | |
Union, | |
) | |
__all__ = "Formatter", "reformatter" | |
# This TypeVar is named AnyStr to communicate that its value is effectively | |
# equivalent to the AnyStr Union, but it does also need to serve as a TypeVar. | |
# This name shadowing should be fine because it is not exported in __all__. | |
AnyStr = TypeVar("AnyStr", bytes, str) | |
Formatter = Callable[[AnyStr, bool], Optional[AnyStr]] | |
def reformatter( | |
pattern: Union[AnyStr, Pattern[AnyStr]], | |
fstring: Union[AnyStr, Sequence[AnyStr]], | |
groups: Optional[Dict[str, Callable[[AnyStr], AnyStr]]] = None, | |
) -> Formatter: | |
"""Reformatter: Create a Function which will format Strings. | |
:param str pattern: A Regular Expression Pattern. | |
:param str fstring: A String, or Sequence of Strings, with {fields} to be | |
formatted with Groups. | |
:param Optional[dict] groups: An optional Mapping of Group names to | |
Functions that should be run on those Groups. | |
:return: A Function that accepts a String, uses a Regular Expression to | |
extract Groups from it, and then formats those Groups into another | |
String. | |
""" | |
# If the pattern received is just a String, compile it into a RegEx Pattern. | |
if not isinstance(pattern, Pattern): | |
pattern = compile(pattern) | |
# If only one Format String was received, put it into a Tuple by itself. | |
if isinstance(fstring, (bytes, str)): | |
fstring = (fstring,) | |
# If no Group transformation map was provided, make a dummy. | |
if groups is None: | |
groups = {} | |
def fmt(instr: AnyStr, allow_nones: bool = False) -> Optional[AnyStr]: | |
match_init: Optional[Match] = pattern.search(instr) | |
if match_init is None: | |
return | |
# Create a Mapping of all the Groups found in the Match. For each Group | |
# value, if a transformation Function is supplied for it, run the | |
# Function before putting it into the Mapping. | |
mapping: Dict[str, Optional[AnyStr]] = { | |
key: groups[key](val) if val is not None and key in groups else val | |
for key, val in match_init.groupdict().items() | |
if allow_nones or val is not None | |
} | |
# Try to Format using each String provided. If one fails, for example | |
# because the String has a Field without a matching Group, try the | |
# next one. | |
for fstr in fstring: | |
try: | |
return fstr.format(**mapping) | |
except: | |
continue | |
# Generate a docstring for the Function, including summaries of the Regular | |
# Expression it matches against and the Format Strings it outputs with. | |
fmt.__doc__ = """Reformat an input String into a different structure. | |
The input String will be matched against the following Regular Expression: | |
{!r} | |
The Groups of the resulting Match will then be formatted into the first one | |
of the following Format Strings which works: | |
{} | |
Returns `None` if none of the Format operations are successful.""".format( | |
pattern.pattern, "\n ".join(map(repr, fstring)) | |
) | |
return fmt |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment