Skip to content

Instantly share code, notes, and snippets.

@kmdarshan
Created November 7, 2016 06:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kmdarshan/ea10fa9c6312a88e55f5b2a3dad38146 to your computer and use it in GitHub Desktop.
Save kmdarshan/ea10fa9c6312a88e55f5b2a3dad38146 to your computer and use it in GitHub Desktop.
Count the number of words of each type in a sentence
"""Count words."""
def count_words(s, n):
"""Return the n most frequently occuring words in s."""
listOfWords = s.split(' ')
dict = {}
cntr = 0
for word in listOfWords:
if dict.has_key(word):
cntr = dict.get(word)
#print word +"--" + str(cntr)
dict[word] = int(cntr) + 1
else:
dict[word] = 1
#print word +"*" + str(cntr)
# for word, cntr in dict.items():
# print "%s is present %s" % (word, cntr)
# TODO: Count the number of occurences of each word in s
# TODO: Sort the occurences in descending order (alphabetically in case of ties)
# TODO: Return the top n words as a list of tuples (<word>, <count>)
# sorted(dict.values())
top_n_new = []
# sort the keys according to the values:
sorted_dict = sorted(dict, key=dict.__getitem__,reverse=True)
totalCntr = 0
for k in sorted_dict:
minime = (str(k), int(dict[k]))
top_n_new.append(minime)
numberDict = {}
for k1,k2 in top_n_new:
if numberDict.has_key(k2):
wordlist = numberDict[k2]
wordlist.append(k1)
numberDict[k2] = wordlist
else:
wordlist = []
wordlist.append(k1)
numberDict[k2] = wordlist
newNumberDict = {}
for key in numberDict.keys():
mylist = numberDict[key]
newlist = sorted(mylist)
newNumberDict[key] = newlist
sortedKeyList = sorted(newNumberDict,reverse=True)
top_n=[]
for key in sortedKeyList:
# create a tuple now
for tup in newNumberDict[key]:
newTuple = (tup, int(key))
#print newTuple
top_n.append(newTuple)
diff = len(top_n) - n
if len(top_n) > n:
diff = len(top_n) - n
#print str(diff) + "remove" +str(n)
top_n = top_n[:n]
return top_n
def test_run():
"""Test count_words() with some inputs."""
print count_words("cat bat mat cat bat cat", 3)
print count_words("betty bought a bit of butter but the butter was bitter", 3)
print count_words('london bridge is falling down falling down falling down london bridge is falling down my fair lady', 5)
if __name__ == '__main__':
test_run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment