Skip to content

Instantly share code, notes, and snippets.

@smuuf
Created December 7, 2022 09:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save smuuf/82e5b315b85b30385c7019d8f1ea0c3e to your computer and use it in GitHub Desktop.
Save smuuf/82e5b315b85b30385c7019d8f1ea0c3e to your computer and use it in GitHub Desktop.
Re or Pyre2
import random
import logging
import re as module_re
import re2 as module_re2
USAGE_PROBABILITY = 0.5
USE_ONLY_PYRE2 = True
_OUR_ATTRS = ('_re', '_re2', '_warn')
_OVERRIDE_RE_METHODS = ('search', 'sub', 'finditer')
_FINAL_ATTRS = _OUR_ATTRS + _OVERRIDE_RE_METHODS
class _ReOrPyrePattern:
"""We want to try if some of our regex matching could be done with `pyre2`
instead of native `re`.
This class is an opt-in wrapper proxy for native `re.Pattern`, which should
act as regular `re.Pattern` with a slight difference in that there's a
chance that `pyre2` will also be used to do the same regex match.
Then, if the `re` vs `pyre2` result is different, a warning will be emitted.
Only `re` result is actually returned, so this should not change behavior
in any way - and is only for informative purposes.
"""
_re: module_re.Pattern
_re2: module_re2.Pattern
def __init__(self, pattern, flags):
self._re = module_re.compile(pattern, flags)
self._re2 = None
try:
self._re2 = module_re2.compile(pattern, flags)
except Exception:
logging.warn(f"[re vs pyre2] pyre2 compile failed for regex: {pattern}")
def __getattr__(self, name):
# If the desired attr is in fact really ours, return it from us.
# Otherwise proxy the call to the internal self._re attr.
if name in _FINAL_ATTRS:
return object.__getattribute__(self, name)
return getattr(self._re, name)
def __setattr__(self, name, value):
# If the desired attr is in fact really ours, return it from us.
# Otherwise proxy the call to the internal self._re attr.
if name in _FINAL_ATTRS:
object.__setattr__(self, name, value)
return
setattr(self._re, name, value)
def __nonzero__(self):
return bool(self._re)
def __str__(self):
return str(self._re)
def _repr__(self):
return repr(self._re)
@staticmethod
def _warn(origin: str, msg: str) -> None:
logging.warn(f"[reorpyre][{origin}] {msg}")
def search(self, *args, **kwargs):
re_result = self._re.search(*args, **kwargs)
if not self._re2:
return re_result
re2_result = None
try:
re2_result = self._re2.search(*args, **kwargs)
except Exception:
self.warn('search', f"Pyre2 raised exception for regex: {self._re.pattern}")
if re_result and re2_result:
if re_result.groups() != re2_result.groups():
self._warn('search', f"Groups not same for regex: {self._re.pattern}")
else:
if not re_result and not re2_result:
pass
else:
self._warn('search', f"Matched only one: {re_result} vs {re2_result} for regex: {self._re.pattern}")
return re_result
def sub(self, *args, **kwargs):
re_result = self._re.sub(*args, **kwargs)
if not self._re2:
return re_result
re2_result = None
try:
re2_result = self._re2.sub(*args, **kwargs)
except Exception:
self._warn('sub', f"Pyre2 raised exception for regex: {self._re.pattern}")
if re_result and re2_result and (re_result != re2_result):
self._warn('sub', f"Result not same for regex: {self._re.pattern}")
return re_result
def finditer(self, *args, **kwargs):
re_result_iter = self._re.finditer(*args, **kwargs)
if not self._re2:
yield from re_result_iter
re2_result_iter = None
try:
re2_result_iter = self._re2.finditer(*args, **kwargs)
except Exception:
self._warn('finditer', f"Pyre2 raised exception for regex: {self._re.pattern}")
for re_result, re2_result in zip(re_result_iter, re2_result_iter):
if re_result and re2_result:
if re_result.groups() != re2_result.groups():
self._warn('search', f"Groups not same for regex: {self._re.pattern}")
elif not re2_result:
self._warn('finditer', f"Matched only one: {re_result} vs {re2_result} for regex: {self._re.pattern}")
yield re_result
def compile(pattern, flags=0):
# Some probability that pyre2 will be also tested.
if random.random() < USAGE_PROBABILITY:
if USE_ONLY_PYRE2:
return module_re2.compile(pattern, flags)
return _ReOrPyrePattern(pattern, flags)
return module_re.compile(pattern, flags)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment