Skip to content

Instantly share code, notes, and snippets.

@yonglam
Created August 29, 2018 03:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yonglam/d50c05a264fdd738960cf124fb709958 to your computer and use it in GitHub Desktop.
Save yonglam/d50c05a264fdd738960cf124fb709958 to your computer and use it in GitHub Desktop.
'''
phone number regex
-------------------'''
def isPhoneNumber(text):
if len(text) != 12:
return False #not phone number-sized
for i in range(0, 3):
if not text[i].isdecimal():
return False
if text[3] != '-':
return False
for i in range(4, 7):
if not text[i].isdecimal():
return False
if text[7] != '-':
return False
for i in range(8, 12):
if not text[i].isdecimal():
return False
return True
print (isPhoneNumber('415-555-1234'))
message = "Call me at 415-444-1011 or at 903-772-3878"
foundNumber = False
for i in range(len(message)):
chunk = message[i:i+12]
if isPhoneNumber(chunk):
print ('Phone Number Found: ' + chunk)
foundNumber = True
if not foundNumber:
print('could not find a phone number')
'''--------------------------------------------------------'''
import re
phoneNumRegex = re.compile()
message = "Call me at 415-444-1011 or at 903-772-3878"
''' --------------------------------------- '''
''' Regex Basics '''
''' --------------------------------------- '''
#build regular expression object *stored in phoneNumberRegex
phoneNumberRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
#Regex object has search method, returns a `match object`
mo = phoneNumberRegex.search(message)
print mo.group()
#find all method returns list of matches
print phoneNumberRegex.findall(message)
#groups
#parenthes mark group
phonReg = re.compile(r'(\d\d\d)-(\d\d\d)-(\d\d\d\d)')
mo = phonReg.search(message)
print mo.group()
print mo.group(1)
print mo.group(2)
print mo.group(3)
#literal parenthesis
message = 'my phone number is (813)-255-8812)'
phoneReg = re.compile(r'\(\d\d\d\)-\d\d\d-\d\d\d\d')
mo = phoneReg.search(message)
print mo.group()
''' --------------------------------------- '''
''' Regex Logic '''
''' --------------------------------------- '''
batRegex = re.compile(r'Bat(man|mobile|copter|bat)')
mo = batRegex.search('Batmobile lost a wheel')
print mo.group()
#? 0 or 1 times only
batRegex = re.compile(r'Bat(wo)?man')
mo = batRegex.search('The adventures of Batman')
moo = batRegex.search('Batwoman loves batman')
print mo.group()
print moo.group()
#looks for area code, but matches even if no area code
phoneRegex = re.compile('(\d\d\d-)?\d\d\d-\d\d\d\d')
mo = phoneRegex.search("My phone number is 555-8821")
print mo.group()
# * 0 or more times
batRegex = re.compile(r'Bat(wo)*man')
mo = batRegex.search('Batwowowowowoman')
print mo.group()
# + once or more
batRegex = re.compile(r'Bat(wo)+man')
mo = batRegex.search("Adventures of Batwoman")
print mo.group()
mo = batRegex.search("Batwowowowoman")
print mo.group()
''' --------------------------------------- '''
''' Regex Groups '''
''' --------------------------------------- '''
#finding groups
haRegex = re.compile(r'(ha){3}')
mo = haRegex.search("he said 'hahaha'")
print mo.group()
#match three phone numbers
PhoneReg = re.compile(r"((\d\d\d-)?\d\d\d-\d\d\d\d(,)?( )?){3}")
mo = PhoneReg.search('phone numbers 888-888-8888, 233-111-2232 113-1212')
print mo.group()
#range groups
hareg = re.compile(r'(ha){3,5}')
mo = hareg.search('hahaha')
print mo.group()
mo = hareg.search('hahahahaha')
print mo.group()
hareg = re.compile(r'(ha){,5}') #same as 0-5
hareg = re.compile(r'(ha){3,}') #3 or more
#greedy(default)
digitRegex = re.compile(r'(\d){3,5}')
mo = digitRegex.search('1234567890')
print mo.group() #will return the max amount of charachters (5)
#non-greedy
digitRegex = re.compile(r'(\d){3,5}?')
mo = digitRegex.search('0123456789')
print mo.group()
'''---------------------------------------------'''
''' Find All '''
'''---------------------------------------------'''
phoneReg = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')
grou = phoneReg.findall(message)
#find all with grouping
phoneReg = re.compile(r'(\d\d\d-)(\d\d\d-\d\d\d\d)')
#print phoneReg.findall(message)
'''---------------------------------------------'''
''' Character Classes '''
'''---------------------------------------------'''
# \d - matches any charachter thats a numeric digit
# \D - matches any charachter that is NOT a numeric digit from 0 to 9
# \w - matches any letter nunmberic digit or the underscore character
# \W - matches any character that is not a letter, number or _
# \s - matches any space, tab, or newline charachter
# \S any charachter that is not a space, tab, or newline
christmas = ''' 12 lords leaping
11 ladies dancing
10 pipers piping
9 drummers drumming
8 maids milking
7 swans swimming
6 geese laying
5 gold rings
4 colly birds
3 french hens
2 turtle doves and
1 partridge in a pear tree '''
#digit one or more times, followed by a space, followed by char 1 or more
xmas = re.compile(r'\d+\s\w+')
print xmas.findall(christmas)
'''---------------------------------------------'''
''' Custom Character Classes '''
'''---------------------------------------------'''
regexObj = re.compile(r'[aeiou]') #finds vowels
regexObj = re.compile(r'[a-z]') #finds all lowercase from a-z
regexObj = re.compile(r'[a-fA-F]') #finds all a-f lowercase and capital
regexObj = re.compile(r'[aeiouAEIOU]') #finds lowercase and capital vowels
print regexObj.findall('robocop eats baby food')
# ['o', 'o', 'o', 'e', 'a', 'a', 'o', 'o']
regexObj = re.compile('[aeiouAEIOU]{2}') #match 2 vowels in a row
print regexObj.findall('robocop eats baby food')
'''---------------------------------------------'''
''' Negative Character Classes '''
'''---------------------------------------------'''
contsReg = re.compile(r'[^aeiouAEIOU]') #matches any char thats NOT vowel
print contsReg.findall('robo cop eats babyfood')
#['r', 'b', ' ', 'c', 'p', ' ', 't', 's', ' ', 'b', 'b', 'y', 'f', 'd']
import re
''' ----------------------------- '''
''' Regex .* ^ $ '''
''' ----------------------------- '''
# ^ - matches strings that begin with
# $ - matches strings that end with
# . - matches any char except new line
# * - matches zero or more
# .*- matches any charachter, any amount of time except newline
# default is greedy
# .*? - non greedy dotstar expression
# re.compile(r'.*', re.DOTALL) - matches ALL characters
# re.compile(r'[aeiou]', re.IGNORECASE) - ignores case
# re.compile(r'[aeiou]', re.I) - same as above
beginHelloRegex = re.compile(r'^Hello') #match string beginning with hello
mo = beginHelloRegex.search('Hello how are you')
print mo.group()
endHelloRegex = re.compile(r'world!$') #matches string that ends with `world`
mo = endHelloRegex.search('hello world!')
print mo.group()
allDigitsRegex = re.compile(r'^\d+$') #begin and end with number
mo = allDigitsRegex.search('651652166262')
print mo.group()
atRegex = re.compile(r'.at')
print atRegex.findall("The cat in the hat sat on the flat mat")
string = "First Name: John Last Name: Smith"
nameRegex = re.compile(r'First Name: (.*) Last Name: (.*)')
print nameRegex.findall(string)
serve = "<To serve humans> for dinner.>"
greedy = re.compile(r'<(.*)>')
mo = greedy.search(serve)
print mo.group()
nongreedRegex = re.compile('<(.*?)>')
mo = nongreedRegex.search(serve)
print mo.group()
#.* matches up to first \n character
string = 'Serve the public trust. \n Protect the innocent \n Upload the law'
newLineRegex = re.compile('.*')
mo = newLineRegex.search(string)
print mo.group()
# Match All charachters, even new lines
string = 'Serve the public trust. \n Protect the innocent \n Upload the law'
allCharRegex = re.compile('.*', re.DOTALL)
mo = allCharRegex.search(string)
print mo.group()
string = "AbcdEfghIjklmnOpqrstUvwxyz"
caseSenReg = re.compile(r'[aeiou]')
print caseSenReg.findall(string)
caseInSenReg = re.compile(r'[aeiou]', re.IGNORECASE)
print caseInSenReg.findall(string)
import re
''' -------------------------------- '''
''' re.sub '''
''' -------------------------------- '''
namesRegex = re.compile(r'Agent \w+') #find letter up until space char
print namesRegex.findall("Agent Alice gave the secret documents to Agent Bob.")
print namesRegex.sub('REDACTED', "Agent Alice gave the secret documents to Agent Bob.")
namesRegex = re.compile(r'Agent (\w)\w*') #only matches first char
print namesRegex.findall("Agent Alice gave the secret documents to Agent Bob.")
print namesRegex.sub(r'AGENT \1****', "Agent Alice gave the secret documents to Agent Bob.")
''' -------------------------------- '''
''' re.verbose '''
''' -------------------------------- '''
#allows long strings and comments inside of expression
re.compile(r'''
\d\d\d- #area code
\d\d\d-
\d\d\d\d''', re.VERBOSE)
''' -------------------------------- '''
''' Bitwise Comparison '''
''' -------------------------------- '''
re.compile('\d\d\d', re.IGNORECASE | re.DOTALL | re.VERBOSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment