Skip to content

Instantly share code, notes, and snippets.

@rectangletangle
Created May 27, 2014 23:56
Show Gist options
  • Save rectangletangle/9251b9c524aabf252774 to your computer and use it in GitHub Desktop.
Save rectangletangle/9251b9c524aabf252774 to your computer and use it in GitHub Desktop.
import re
# This is the literal code as copied from the file, for our purposes it's equivalent to the code below.
"""
def extract_hashtag_values(text, pattern=settings.HASHTAG_PATTERN):
for hashtag_match in re.finditer(pattern, text):
yield hashtag_match.group(1)
"""
# This utilizes functions, list comprehensions, and regexs to do something useful and reusable.
def extract_hashtags(text):
# This is a pretty complicated regex. Usually, they're simpler than this. The complexity is neccessary in order to
# catch a lot of "edge cases." So this isn't a great example in that regard.
# http://en.wikipedia.org/wiki/Edge_case
pattern = '(?:(?<=\s)|^)#(\w*[A-Za-z\d\-]{2,60}\w*)'
return [hashtag_match.group(1) for hashtag_match in re.finditer(pattern, text)]
def extract_phone_numbers(text):
# This uses a simpler regex to do an overall similar task. `\(` matches `(`, and `\d` matches 0-9
pattern = '\(\d\d\d\)-\d\d\d-\d\d\d\d'
# `finditer` makes an iterator (something we can loop through) of matches to our pattern.
return [hashtag_match.group(0) for hashtag_match in re.finditer(pattern, text)]
if __name__ == '__main__':
# Here we call the function, this runs the code defined above twice.
print(extract_hashtags('some #hashtagged #text')) # ['hashtagged', 'text']
print(extract_hashtags('#foo bar #baz #1-2-3')) # ['foo', 'baz', '1-2-3']
# Notice the number in the middle isn't an *exact* pattern match.
print(extract_phone_numbers('(123)-456-7890 342-234-3245 (098)-765-4321')) # ['(123)-456-7890', '(098)-765-4321']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment