Skip to content

Instantly share code, notes, and snippets.

@msuprun
Last active August 29, 2015 13:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save msuprun/9885235 to your computer and use it in GitHub Desktop.
Save msuprun/9885235 to your computer and use it in GitHub Desktop.
Hamlet word count SC-NYU-Bootcamp 2014 - Python
Python
Hamlet example
data here: http://www.gutenberg.org/cache/epub/2265/pg2265.txt
def read_Hamlet():
my_file = open("hamlet.txt")
return my_file
my_file = read_Hamlet()
wordCounts = dict()
maxCount = 0
maxKey = None
for line in my_file:
#if 'Hamlet' in line:
# print line.strip()
words = line.split()
print words
for word in words:
if word in wordCounts.keys():
wordCounts[word] = wordCounts[word] + 1
else:
wordCounts[word] = 1
if wordCounts[word] > maxCount:
maxCount = wordCounts[word]
maxKey = word
#break
my_file.close()
Function-ified
#functions we defined
def read_Hamlet(i):
#read the file and return a list of lines
#I expect i to be a... string or int
print i
my_file = open("hamlet.txt")
theLines = my_file.readlines()
print type(theLines)
my_file.close()
return theLines
def initializeDictionary():
#make a dictionary for our results, and set it up for updating
wordCounts = dict()
maxCount = 0
maxKey = None
wordCounts['MAXCOUNT'] = maxCount
wordCounts['MAXKEY'] = maxKey
wordCounts['NUMWORDS'] = 0
return wordCounts
def updateDictionary(wordCounts, word):
#add a word to the dictionary, and/or update the counts
if word in wordCounts.keys():
wordCounts[word] = wordCounts[word] + 1
else:
wordCounts[word] = 1
if wordCounts[word] > wordCounts['MAXCOUNT']:
wordCounts['MAXCOUNT'] = wordCounts[word]
wordCounts['MAXKEY'] = word
def getWords(line):
#split a line into words
words = line.lower().split()
return words
#main script
listOfLines = read_Hamlet('This is Elenas example.')
dictOfWordCounts = initializeDictionary()
#print dictOfWordCounts
#print type(listOfLines)
for line in listOfLines:
#print line
words = getWords(line)
#print words
for word in words:
updateDictionary(dictOfWordCounts,word)
dictOfWordCounts.keys()[0:10] #prints out first few keys as sanity check
def download_webpage(url):
'''Downloads a webpage and returns the text.'''
page = urllib2.urlopen(url)
return page.read()
hamlet_url = 'http://sydney.edu.au/engineering/it/~matty/Shakespeare/texts/tragedies/hamlet'
print(download_webpage(hamlet_url))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment