Last active
February 27, 2020 18:29
-
-
Save SerhatTeker/a252e6bb466a067799e42d7c87bbead4 to your computer and use it in GitHub Desktop.
Word Count Regex - Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import collections | |
def count_words(sentence): | |
word_list = re.findall(r"[\da-zA-Z]+(?:\'[\da-zA-Z]+)?", sentence.lower()) | |
return collections.Counter(word_list) | |
# Alternative | |
#------------------------------------------------------------------------------ | |
def unquoted(word): | |
if word.startswith("'") and word.endswith("'"): | |
return [word for word in word.split("'") if word != ""][0] | |
return word | |
def count_words_alternative(sentence): | |
# regex is kept simpler to improve readability | |
words = re.findall("[A-Za-z']+|[0-9]+", sentence.lower()) | |
return collections.Counter(map(unquoted, words)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Derived from the solution of https://github.com/exercism/python/tree/b89880740cc65fc07c2ba2beeefa18cf4cebcebe/exercises/word-count.