Skip to content

Instantly share code, notes, and snippets.

@johntmyers
Last active May 22, 2020 19:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save johntmyers/0a7b6cc44a87ab9adce1eaf449db2806 to your computer and use it in GitHub Desktop.
Save johntmyers/0a7b6cc44a87ab9adce1eaf449db2806 to your computer and use it in GitHub Desktop.
String to regex converter
import re
def _sub_special_chars_any(in_string: str):
return re.sub("[^a-z\d\s]", ".?", in_string)
def _mark_substrings_optional(in_string: str, n=3):
parts = in_string.split()
if len(parts) == 1:
return "\\b" + in_string + "\\b"
mod = []
for i, part in enumerate(parts):
# see how many alphanumerics are in the part
tmp = re.sub("[^a-z\d]", "", part)
if len(tmp) <= n and i > 0: # don't consider the first part of the string i.e ("AB Corporation")
parts[i] = f"\s?({part})?\s?"
mod.append(i)
out = parts[0]
# not using " ".join(parts) so we can detect
# when not to inject a whitespace
for i, part in enumerate(parts[1:]):
if i+1 in mod:
out = out + part
else:
out = out + " " + part
out = re.sub(" ", "\\\s", out)
return out
def convert(in_string: str):
print(_mark_substrings_optional(_sub_special_chars_any(in_string.lower())))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment