Created
November 7, 2016 06:29
-
-
Save kmdarshan/ea10fa9c6312a88e55f5b2a3dad38146 to your computer and use it in GitHub Desktop.
Count the number of words of each type in a sentence
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Count words.""" | |
def count_words(s, n): | |
"""Return the n most frequently occuring words in s.""" | |
listOfWords = s.split(' ') | |
dict = {} | |
cntr = 0 | |
for word in listOfWords: | |
if dict.has_key(word): | |
cntr = dict.get(word) | |
#print word +"--" + str(cntr) | |
dict[word] = int(cntr) + 1 | |
else: | |
dict[word] = 1 | |
#print word +"*" + str(cntr) | |
# for word, cntr in dict.items(): | |
# print "%s is present %s" % (word, cntr) | |
# TODO: Count the number of occurences of each word in s | |
# TODO: Sort the occurences in descending order (alphabetically in case of ties) | |
# TODO: Return the top n words as a list of tuples (<word>, <count>) | |
# sorted(dict.values()) | |
top_n_new = [] | |
# sort the keys according to the values: | |
sorted_dict = sorted(dict, key=dict.__getitem__,reverse=True) | |
totalCntr = 0 | |
for k in sorted_dict: | |
minime = (str(k), int(dict[k])) | |
top_n_new.append(minime) | |
numberDict = {} | |
for k1,k2 in top_n_new: | |
if numberDict.has_key(k2): | |
wordlist = numberDict[k2] | |
wordlist.append(k1) | |
numberDict[k2] = wordlist | |
else: | |
wordlist = [] | |
wordlist.append(k1) | |
numberDict[k2] = wordlist | |
newNumberDict = {} | |
for key in numberDict.keys(): | |
mylist = numberDict[key] | |
newlist = sorted(mylist) | |
newNumberDict[key] = newlist | |
sortedKeyList = sorted(newNumberDict,reverse=True) | |
top_n=[] | |
for key in sortedKeyList: | |
# create a tuple now | |
for tup in newNumberDict[key]: | |
newTuple = (tup, int(key)) | |
#print newTuple | |
top_n.append(newTuple) | |
diff = len(top_n) - n | |
if len(top_n) > n: | |
diff = len(top_n) - n | |
#print str(diff) + "remove" +str(n) | |
top_n = top_n[:n] | |
return top_n | |
def test_run(): | |
"""Test count_words() with some inputs.""" | |
print count_words("cat bat mat cat bat cat", 3) | |
print count_words("betty bought a bit of butter but the butter was bitter", 3) | |
print count_words('london bridge is falling down falling down falling down london bridge is falling down my fair lady', 5) | |
if __name__ == '__main__': | |
test_run() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment