The python regex structure is mostly this
re.<method>
is the go to interface with common signature as (pattern, string, flags=0)
and most of the time there is an equivalent
Pattern.<method>
with the first argument being the string
instead of pattern
But the <method>
has [, pos[, endpos]]
and no flags=0
which might be useful
So I will omit all the equivalent methods' signatures
# Example
re.search(pattern, string, flags=0)
re.match(pattern, string, flags=0)
re.fullmatch(pattern, string, flags=0)
# this is how the signatures are changed
Pattern.search(string[, pos[, endpos]])
Pattern.match(string[, pos[, endpos]])
Pattern.fullmatch(string[, pos[, endpos]])
>>> m = re.search(r"c", "abcde") >>> a <re.Match object; span=(2, 3), match='c'> >>> m.group(0) # this gets the whole match 'c' >>> m = re.search(r"(c)", "abcde") # this one has a group >>> m.group(1) # first group 'c' >>> m.group(2) # if out of index IndexError: no such group
re.search("c", "abcdef") # <re.Match object; span=(2, 3), match='c'> r"c".search("abcdef") # <re.Match object; span=(2, 3), match='c'> # Below two are equivalent re.search("^c", "abcdef") # None re.match("c", "abcdef") # None # re.match implies ^ re.fullmatch("c", "abcdef") # None # re.fullmatch implies ^$re.search(pattern, string, flags=0) re.match(pattern, string, flags=0) re.fullmatch(pattern, string, flags=0) # this is how the signatures are changed Pattern.search(string[, pos[, endpos]]) Pattern.match(string[, pos[, endpos]]) Pattern.fullmatch(string[, pos[, endpos]])
re.findall("c", "cc") # ["c", "c"] re.findall("(c).", "cdce") # ["c", "c"] - when only 1 group, return group re.findall("(c)(.)", "cdce") # [("c", "d"), ("c", "e")] - when 2 or more, return tuple re.finditer("c", "cc") # Same as findall but an iterable # If you want to get multiple match objects go to the Bonus sectionre.findall(pattern, string, flags=0) re.finditer(pattern, string, flags=0)
re.sub("c", "5", "abcde") # "ab5de" re.sub("c(.)", "\\1k", "abdke") # "ab5de" # a Match object is always passed re.sub("c", lambda x: x.group(0).upper(), "abcde") # "abCde" re.subn("c", "5", "abcde") # ("ab5de", 1) # because it replaced oncere.sub(pattern, repl, string, count=0, flags=0) re.subn(pattern, repl, string, count=0, flags=0)
# ['space', 'tab', 'newline', 'allworks'] re.split(r"\s+", "space tab newline\nallworks")re.split(pattern, string, maxsplit=0, flags=0)
# Only the useful ones. There is no g flag re.IGNORECASE -> re.I re.MULTILINE -> re.M re.DOTALL -> re.S # single line
r = re.compile("my\spattern", re.I | re.M) r.match("whatever") # None
# Match multiple Match objects
def match_multi(pattern, string, flags=0):
i = 0
while val := pattern.search(string, i):
i = val.end()
yield val
# Example code
regex = re.compile(r"hi (\w+)", flags=re.I)
for m in match_multi(regex, "Hi Stewie, hi Joe, hi Peter"):
print(f"{m.group(1):10}| {repr(m)}")