Skip to content

Instantly share code, notes, and snippets.

@Yaulendil
Created August 31, 2020 20:23
Show Gist options
  • Save Yaulendil/fb124366f734d24dfbe2e664e6312d80 to your computer and use it in GitHub Desktop.
Save Yaulendil/fb124366f734d24dfbe2e664e6312d80 to your computer and use it in GitHub Desktop.
RxFmt - A streamlined factory function for reformatting Python Strings, using Regular Expressions and groups.
"""RxFmt: Implements a factory function to convert a Regular Expression Pattern,
a String, and a Dict into a new function. The new function takes a String,
breaks it into the groups in the Pattern, and formats the groups into a new
String.
Example:
>>> hl = reformatter(
... r"(?P<INDEX>\d+)\W*(?P<WORD>.*)", # RegEx Pattern defining named groups.
... "{WORD} : {INDEX:0>3}", # Python String taking the groups as fields.
... {"WORD": str.upper} # (Optional) Dict mapping group names to functions.
... )
>>> hl("1. asdf") # 'ASDF : 001'
>>> hl("1234!!!qwert") # 'QWERT : 1234'
"""
from re import compile
from typing import (
Callable,
Dict,
Match,
Optional,
Pattern,
Sequence,
TypeVar,
Union,
)
__all__ = "Formatter", "reformatter"
# This TypeVar is named AnyStr to communicate that its value is effectively
# equivalent to the AnyStr Union, but it does also need to serve as a TypeVar.
# This name shadowing should be fine because it is not exported in __all__.
AnyStr = TypeVar("AnyStr", bytes, str)
Formatter = Callable[[AnyStr, bool], Optional[AnyStr]]
def reformatter(
pattern: Union[AnyStr, Pattern[AnyStr]],
fstring: Union[AnyStr, Sequence[AnyStr]],
groups: Optional[Dict[str, Callable[[AnyStr], AnyStr]]] = None,
) -> Formatter:
"""Reformatter: Create a Function which will format Strings.
:param str pattern: A Regular Expression Pattern.
:param str fstring: A String, or Sequence of Strings, with {fields} to be
formatted with Groups.
:param Optional[dict] groups: An optional Mapping of Group names to
Functions that should be run on those Groups.
:return: A Function that accepts a String, uses a Regular Expression to
extract Groups from it, and then formats those Groups into another
String.
"""
# If the pattern received is just a String, compile it into a RegEx Pattern.
if not isinstance(pattern, Pattern):
pattern = compile(pattern)
# If only one Format String was received, put it into a Tuple by itself.
if isinstance(fstring, (bytes, str)):
fstring = (fstring,)
# If no Group transformation map was provided, make a dummy.
if groups is None:
groups = {}
def fmt(instr: AnyStr, allow_nones: bool = False) -> Optional[AnyStr]:
match_init: Optional[Match] = pattern.search(instr)
if match_init is None:
return
# Create a Mapping of all the Groups found in the Match. For each Group
# value, if a transformation Function is supplied for it, run the
# Function before putting it into the Mapping.
mapping: Dict[str, Optional[AnyStr]] = {
key: groups[key](val) if val is not None and key in groups else val
for key, val in match_init.groupdict().items()
if allow_nones or val is not None
}
# Try to Format using each String provided. If one fails, for example
# because the String has a Field without a matching Group, try the
# next one.
for fstr in fstring:
try:
return fstr.format(**mapping)
except:
continue
# Generate a docstring for the Function, including summaries of the Regular
# Expression it matches against and the Format Strings it outputs with.
fmt.__doc__ = """Reformat an input String into a different structure.
The input String will be matched against the following Regular Expression:
{!r}
The Groups of the resulting Match will then be formatted into the first one
of the following Format Strings which works:
{}
Returns `None` if none of the Format operations are successful.""".format(
pattern.pattern, "\n ".join(map(repr, fstring))
)
return fmt
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment