Skip to content

Instantly share code, notes, and snippets.

@thuwarakeshm
Last active August 9, 2022 00:50
Show Gist options
  • Save thuwarakeshm/9c1f427c5fe9e7aca5df563f3218a984 to your computer and use it in GitHub Desktop.
Save thuwarakeshm/9c1f427c5fe9e7aca5df563f3218a984 to your computer and use it in GitHub Desktop.
Regex with PRegEx
from pregex.classes import AnyButWhitespace
from pregex.groups import Capture
from pregex.quantifiers import OneOrMore, AtLeastAtMost
pattern = (
OneOrMore(AnyButWhitespace())
+ "@"
+ Capture(
OneOrMore(AnyButWhitespace()) + "." + AtLeastAtMost(AnyButWhitespace(), 2, 3)
)
)
text = """My names is Alice. I live in Wonderland. You can mail me: alice@wonderland.com.
In case if I couldn't reply, please main my friend the White Rabbit: whiterabbit@wonderland.com.
But for more serious issues, you should main Tony Stark at tony@stark.org.
"""
# Get everything you captured.
pattern.get_captures(text)
# [('wonderland.com',), ('wonderland.com',), ('stark.org',)]
# Get all your matches.
pattern.get_matches(text)
# ['alice@wonderland.com', 'whiterabbit@wonderland.com', 'tony@stark.org']
from pregex.classes import AnyButWhitespace
from pregex.groups import Capture
from pregex.operators import Either
from pregex.quantifiers import OneOrMore
pattern = (
OneOrMore(AnyButWhitespace())
+ "@"
+ Capture(OneOrMore(AnyButWhitespace()) + Either(".com", ".org"))
)
text = """My names is Alice. I live in Wonderland. You can mail me: alice@wonderland.com.
In case if I couldn't reply, please main my friend the White Rabbit: whiterabbit@wonderland.com.
But for more serious issues, you should main Tony Stark at tony@stark.org.
Please don't message thanos@wierdland.err
"""
pattern.get_captures(text)
# [('wonderland.com',), ('wonderland.com',), ('stark.org',)]
from pregex.classes import AnyDigit
from pregex.quantifiers import Exactly, Optional
pattern = Exactly(AnyDigit(), 5) + Optional("-" + Exactly(AnyDigit(), 4))
address1 = "730 S White Sands Blvd, Alamogordo, NM 88310, United States"
address2 = "730 S White Sands Blvd, Alamogordo, NM 88310-7421, United States"
pattern.get_matches(address1)
# ['88310']
pattern.get_matches(address2)
# ['88310-7421']
import re
pattern = r"\d{5}(-\d{4})?"
address = "730 S White Sands Blvd, Alamogordo, NM 88310, United States"
zip_code = re.search(pattern, address).group()
print(zip_code)
# 88310
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment