nopeless/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Official python re examples are so bad

Read this small note before you go

The python regex structure is mostly this

re.<method> is the go to interface with common signature as (pattern, string, flags=0)

and most of the time there is an equivalent
Pattern.<method> with the first argument being the string instead of pattern

But the <method> has [, pos[, endpos]] and no flags=0 which might be useful

So I will omit all the equivalent methods' signatures
# Example
re.search(pattern, string, flags=0)
re.match(pattern, string, flags=0)
re.fullmatch(pattern, string, flags=0)
# this is how the signatures are changed
Pattern.search(string[, pos[, endpos]])
Pattern.match(string[, pos[, endpos]])
Pattern.fullmatch(string[, pos[, endpos]])
re.Match object


>>> m = re.search(r"c", "abcde")
>>> a
<re.Match object; span=(2, 3), match='c'>
>>> m.group(0) # this gets the whole match
'c'
>>> m = re.search(r"(c)", "abcde") # this one has a group
>>> m.group(1) # first group
'c'
>>> m.group(2) # if out of index
IndexError: no such group


Match


re.search("c",  "abcdef")  # <re.Match object; span=(2, 3), match='c'>
r"c".search("abcdef") # <re.Match object; span=(2, 3), match='c'>

# Below two are equivalent
re.search("^c", "abcdef")  # None
re.match("c", "abcdef")  # None
# re.match implies ^
re.fullmatch("c", "abcdef") # None
# re.fullmatch implies ^$
Signature

re.search(pattern, string, flags=0)
re.match(pattern, string, flags=0)
re.fullmatch(pattern, string, flags=0)
# this is how the signatures are changed
Pattern.search(string[, pos[, endpos]])
Pattern.match(string[, pos[, endpos]])
Pattern.fullmatch(string[, pos[, endpos]])

Match multiple


re.findall("c",  "cc")       # ["c", "c"]
re.findall("(c).", "cdce")      # ["c", "c"] - when only 1 group, return group
re.findall("(c)(.)", "cdce") # [("c", "d"), ("c", "e")] - when 2 or more, return tuple
re.finditer("c", "cc")  # Same as findall but an iterable
# If you want to get multiple match objects go to the Bonus section
Signature

re.findall(pattern, string, flags=0)
re.finditer(pattern, string, flags=0)

Substitution


re.sub("c", "5", "abcde")  # "ab5de"
re.sub("c(.)", "\\1k", "abdke")  # "ab5de"
# a Match object is always passed
re.sub("c", lambda x: x.group(0).upper(), "abcde")  # "abCde"
re.subn("c", "5", "abcde") # ("ab5de", 1) # because it replaced once
Signature

re.sub(pattern, repl, string, count=0, flags=0)
re.subn(pattern, repl, string, count=0, flags=0)

Split - implies //g


# ['space', 'tab', 'newline', 'allworks']
re.split(r"\s+", "space tab	newline\nallworks")
Signature

re.split(pattern, string, maxsplit=0, flags=0)

Flags


# Only the useful ones. There is no g flag
re.IGNORECASE -> re.I
re.MULTILINE -> re.M
re.DOTALL -> re.S # single line


Misc


r = re.compile("my\spattern", re.I | re.M)
r.match("whatever") # None

Bonus

# Match multiple Match objects
def match_multi(pattern, string, flags=0):
    i = 0
    while val := pattern.search(string, i):
        i = val.end()
        yield val

# Example code
regex = re.compile(r"hi (\w+)", flags=re.I)
for m in match_multi(regex, "Hi Stewie, hi Joe, hi Peter"):
    print(f"{m.group(1):10}| {repr(m)}")