Skip to content

Instantly share code, notes, and snippets.

@exbotanical
Created March 2, 2020 22:06
Show Gist options
  • Save exbotanical/cf61afec7d2e35046ac105dec64cc71d to your computer and use it in GitHub Desktop.
Save exbotanical/cf61afec7d2e35046ac105dec64cc71d to your computer and use it in GitHub Desktop.
python, [regex, re module] (more notes re py roborant)
# # # REGEX # # #
# The ? matches zero or one of the preceding group.
# The * matches zero or more of the preceding group.
# The + matches one or more of the preceding group.
# The {n} matches exactly n of the preceding group.
# The {n,} matches n or more of the preceding group.
# The {,m} matches 0 to m of the preceding group.
# The {n,m} matches at least n and at most m of the preceding group.
# {n,m}? or *? or +? performs a non-greedy match of the preceding group.
# ^spam means the string must begin with spam.
# spam$ means the string must end with spam.
# The . matches any character, except newline characters.
# \d, \w, and \s match a digit, word, or space character, respectively.
# \D, \W, and \S match anything except a digit, word, or space character, respectively.
# [abc] matches any character between the brackets (such as a, b, or c).
# [^abc] matches any character that isn’t between the brackets.
# def is_phone_number(txt):
# if len(txt) != 12:
# return False
# for i in range(0,3):
# if not txt[i].isdecimal:
# return False
# if txt[3] != '-':
# return False
# for i in range(4,7):
# if not txt[i].isdecimal:
# return False
# if txt[7] != '-':
# return False
# for i in range(8,12):
# if not txt[i].isdecimal:
# return False
# return True
# message = 'Call me at 415-555-1011 tomorrow. 415-555-9999 is my office.'
# for i in range(len(message)):
# chunk = message[i:i+12]
# if is_phone_number(chunk):
# print('Phone number found: ' + chunk)
# print('Done')
# import re
# num_regex = re.compile(r'(\d{3})-(\d{3}-\d{4})')
# match = num_regex.search('Call me at 415-555-1011 tomorrow.')
# area_code = match.group(1)
# number = match.group(2)
# print(f'Phone number found: ({area_code}) {number}')
# ha_regex = re.compile(r'(((Ha){4})+)')
# match_ha = ha_regex.findall('I laughed like HaHaHa HaHaHa HaHa HAHaHaHaHaha HaHahaHaHaHA and else-like.')
# print(match_ha)
# phoneNumRegex = re.compile(r'(\d{3})-(\d{3})-(\d{4})')
# print(phoneNumRegex.findall('Cell: 415-555-9999 Work: 212-555-0000'))
# # create own char class
# custom_char_class_regex = re.compile(r'[RNVE]\w+')
# b = custom_char_class_regex.findall('ReactJS, VueJS, NodeJS, ExpressJS, C++, Java')
# print(b) # oh, it returns only the keywords that match my skillset !
# begins_with_hello = re.compile(r'^Hello')
# print(begins_with_hello.search('Hello, world!'))
# print(begins_with_hello.search('I said hello.') == None)
# ends_with_number = re.compile(r'\d$')
# print(ends_with_number.search('Your number is 42'))
# # I always confuse the meanings of these two symbols, so I use the mnemonic “Carrots cost dollars” to remind myself that the caret comes first and the dollar sign comes last.
# wild_card = re.compile(r'.@gmail.com')
# print(wild_card.search('call that number or email hello@gmail.com'))
# atRegex = re.compile(r'.at')
# print(atRegex.findall('The cat in the hat sat on the flat mat.'))
import re
email_regex = re.compile(r'\w+@\w+')
m = email_regex.findall('my email is abc@gmail.com and his is notarealemail@gmail.com and yours is email@email.com')
print(m)
greedy_regex = re.compile(r'(Ha){3,5}')
mo1 = greedy_regex.search('HaHaHaHaHa')
print(mo1.group())
non_greedy_regex = re.compile(r'(Ha){3,5}?')
mo2 = non_greedy_regex.search('HaHaHaHaHa')
print(mo2.group())
phone_regex = re.compile(r'\(?\d{3}\)?-\d{3}-\d{4}')
l = phone_regex.findall('The first phone number is (713)-214-5039 and the second is 281-889-2034. The suite number is L-303')
print(l)
newline_regex = re.compile('.*', re.DOTALL)
o = newline_regex.search('Serve the public trust.\nProtect the innocent.\nUphold the law.').group()
print(o)
# IGNORE CASE
robocop = re.compile(r'robocop', re.I)
print(robocop.search('RoboCop is part man, part machine.').group())
print(robocop.sub('An android', 'RoboCop is part man, part machine.'))
agent_names_regex = re.compile(r'Agent (\w)\w*')
print(agent_names_regex.sub(r'\1****', 'Agent Alice told Agent Carol that Agent Eve knew Agent Bob was a double agent.'))
# VERBOSE MODE EXAMPLE
verbose_regex = re.compile(r'''(
(\d{3}|\(\d{3}\))? # area code
(\s|-|\.)? # separator
\d{3} # first 3 digits
(\s|-|\.) # separator
\d{4} # last 4 digits
(\s*(ext|x|ext.)\s*\d{2,5})? # extension
)''', re.VERBOSE)
# pass multiple args into compile with the pipe | operator
multiple_arg_regex = re.compile('foo', re.IGNORECASE | re.DOTALL | re.VERBOSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment