Skip to content

Instantly share code, notes, and snippets.

Created August 31, 2020 20:23
Show Gist options
  • Save Yaulendil/fb124366f734d24dfbe2e664e6312d80 to your computer and use it in GitHub Desktop.
Save Yaulendil/fb124366f734d24dfbe2e664e6312d80 to your computer and use it in GitHub Desktop.
RxFmt - A streamlined factory function for reformatting Python Strings, using Regular Expressions and groups.
"""RxFmt: Implements a factory function to convert a Regular Expression Pattern,
a String, and a Dict into a new function. The new function takes a String,
breaks it into the groups in the Pattern, and formats the groups into a new
>>> hl = reformatter(
... r"(?P<INDEX>\d+)\W*(?P<WORD>.*)", # RegEx Pattern defining named groups.
... "{WORD} : {INDEX:0>3}", # Python String taking the groups as fields.
... {"WORD": str.upper} # (Optional) Dict mapping group names to functions.
... )
>>> hl("1. asdf") # 'ASDF : 001'
>>> hl("1234!!!qwert") # 'QWERT : 1234'
from re import compile
from typing import (
__all__ = "Formatter", "reformatter"
# This TypeVar is named AnyStr to communicate that its value is effectively
# equivalent to the AnyStr Union, but it does also need to serve as a TypeVar.
# This name shadowing should be fine because it is not exported in __all__.
AnyStr = TypeVar("AnyStr", bytes, str)
Formatter = Callable[[AnyStr, bool], Optional[AnyStr]]
def reformatter(
pattern: Union[AnyStr, Pattern[AnyStr]],
fstring: Union[AnyStr, Sequence[AnyStr]],
groups: Optional[Dict[str, Callable[[AnyStr], AnyStr]]] = None,
) -> Formatter:
"""Reformatter: Create a Function which will format Strings.
:param str pattern: A Regular Expression Pattern.
:param str fstring: A String, or Sequence of Strings, with {fields} to be
formatted with Groups.
:param Optional[dict] groups: An optional Mapping of Group names to
Functions that should be run on those Groups.
:return: A Function that accepts a String, uses a Regular Expression to
extract Groups from it, and then formats those Groups into another
# If the pattern received is just a String, compile it into a RegEx Pattern.
if not isinstance(pattern, Pattern):
pattern = compile(pattern)
# If only one Format String was received, put it into a Tuple by itself.
if isinstance(fstring, (bytes, str)):
fstring = (fstring,)
# If no Group transformation map was provided, make a dummy.
if groups is None:
groups = {}
def fmt(instr: AnyStr, allow_nones: bool = False) -> Optional[AnyStr]:
match_init: Optional[Match] =
if match_init is None:
# Create a Mapping of all the Groups found in the Match. For each Group
# value, if a transformation Function is supplied for it, run the
# Function before putting it into the Mapping.
mapping: Dict[str, Optional[AnyStr]] = {
key: groups[key](val) if val is not None and key in groups else val
for key, val in match_init.groupdict().items()
if allow_nones or val is not None
# Try to Format using each String provided. If one fails, for example
# because the String has a Field without a matching Group, try the
# next one.
for fstr in fstring:
return fstr.format(**mapping)
# Generate a docstring for the Function, including summaries of the Regular
# Expression it matches against and the Format Strings it outputs with.
fmt.__doc__ = """Reformat an input String into a different structure.
The input String will be matched against the following Regular Expression:
The Groups of the resulting Match will then be formatted into the first one
of the following Format Strings which works:
Returns `None` if none of the Format operations are successful.""".format(
pattern.pattern, "\n ".join(map(repr, fstring))
return fmt
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment