Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
playing with python's `collections.Counter`
"""
Use a Counter to find the most common words in "The Wonderful Wizard of Oz" by
L. Frank Baum.
Available in plain text at:
https://ia700500.us.archive.org/2/items/thewonderfulwiza00055gut/wizoz10.txt
short link: http://bit.ly/thewonderfulwizard
Note: This code also counts the words in the header, so it's not a *realistic*
applicaton, but more of a demonstration of python's Counter.
Running this code should give you something like this:
$ python count_words.py
The Top 10 words
the: 2808
and: 1630
to: 1143
of: 869
a: 819
I: 597
was: 502
you: 486
in: 476
he: 408
"""
from collections import Counter
import re
import urllib # for more pleasant http, use http://bit.ly/python-requests
def main(n=10):
# Download the content
content = urllib.urlopen('http://bit.ly/thewonderfulwizard').read()
# Clean the content a little
content = re.sub('\s+', ' ', content) # condense all whitespace
content = re.sub('[^A-Za-z ]+', '', content) # remove non-alpha chars
words = content.split()
# Start counting
word_count = Counter(words)
# The Top-N words
print("The Top {0} words".format(n))
for word, count in word_count.most_common(n):
print("{0}: {1}".format(word, count))
if __name__ == "__main__":
main()
"""
Playing with Python's `Counter`
- it's like a dictionary
- values can be positive/negative integers
- keys correspond to the things you want to count
"""
>>> from collections import Counter
>>> c = Counter() # Create a Counter
>>> c['widgets'] += 1 # start counting 'widgets'
>>> c
Counter({'widgets': 1})
# (most) regular dict methods are available
>>> c.keys()
['widgets']
>>> c.values()
[1]
>>> 'widgets' in c
True
# `update` will create new keys or adjust the counts for
# existing keys
>>> c.update({'foo': 1})
>>> c
Counter({'widgets': 1, 'foo': 1})
# calling `update` again will increment the value of 'foo'
>>> c.update({'foo': 1})
>>> c
Counter({'widgets': 1, 'foo': 2})
# You can create a Counter from an iterable
>>> c = Counter(['larry', 'moe', 'curly'])
>>> c
Counter({'larry': 1, 'curly': 1, 'moe': 1})
# Or you can pass in keyword args
>>> c = Counter(ravens=34, niners=31)
>>> c
Counter({'ravens': 34, 'niners': 31})
# `elements` gives you an iterator that yeilds a `key` for each
# `count`. (You can also create a counter from an iterable).
>>> colors = ['red', 'blue', 'yellow']
>>> c = Counter(colors)
>>> c
Counter({'blue': 1, 'yellow': 1, 'red': 1})
>>> c['red'] += 2 # Three 'red's
>>> c['blue'] += 1 # Two 'blues's
>>> c
Counter({'red': 3, 'blue': 2, 'yellow': 1})
>>> list(c.elements())
['blue', 'blue', 'yellow', 'red', 'red', 'red']
# Finding the N "most common" elements
>>> c.most_common(2)
[('red', 3), ('blue', 2)]
# Trick: Find the most common letters in a string:
>>> Counter('supercalifragilisticexpialidocious').most_common(3)
[('i', 7), ('a', 3), ('c', 3)]
# Subtracting counts
>>> money = {'gold': 1001, 'silver': 501, 'copper': 101}
>>> shield = {'gold': 25}
>>> sword = {'gold': 100, 'silver':50}
# initialize your bank
>>> c = Counter(money)
>>> c
Counter({'gold': 1001, 'silver': 501, 'copper': 101})
# Buy a shield
>>> c.subtract(shield)
>>> c
Counter({'gold': 976, 'silver': 501, 'copper': 101})
# Buy a sword
>>> c.subtract(sword)
Counter({'gold': 876, 'silver': 451, 'copper': 101})
# Buy a Castle!
>>> castle = {'gold': 50000, 'silver': 9999, 'copper': 350}
>>> c.subtract(castle)
>>> c
Counter({'copper': -249, 'silver': -9548, 'gold': -49124})
# oops!
# start over!
>>> c.clear()
Counter()
@salihkaragoz

This comment has been minimized.

Copy link

salihkaragoz commented Feb 2, 2018

Thank you for sharing this code.
Can you update the download links? It seems like broken.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.