Skip to content

Instantly share code, notes, and snippets.

@hodbby
Created February 13, 2012 07:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hodbby/1814587 to your computer and use it in GitHub Desktop.
Save hodbby/1814587 to your computer and use it in GitHub Desktop.
wordcount
import sys
def calculate_data (line_data):
# Got a sring and creates dictionary of (word : count)
dict = {}
for word in line_data:
if word in dict:
dict[word] += 1
else:
dict[word] = 1
return dict
def file_to_dict(file1):
# convert file into string by lower case and removing \n
data = file1.read()
data = data.replace ('\n', ' ')
data = data.lower()
# Split by space
for word in data:
line_data = data.split (" ");
# Sending to function that creates and return dictionary
return calculate_data (line_data)
def print_top (filename):
dict1 = {}
file1 = open (filename,'rU')
dict1 = file_to_dict (file1) # convert file into dictionary
# Print the dictionary top20 sorted by values- The solution.
dict1 = sorted(dict1.items(), key=lambda (k,v):(v,k), reverse=True)
for i in range (5):
print dict1[i]
file1.close ()
return filename
def print_words (filename):
dict1 = {}
file1 = open (filename,'rU')
dict1 = file_to_dict (file1) # convert file into dictionary
# Print the dictionary sorted- The solution.
print sorted (dict1.items())
file1.close ()
return filename
def main():
if len(sys.argv) != 3:
print 'usage: ./wordcount.py {--count | --topcount} file'
sys.exit(1)
option = sys.argv[1]
filename = sys.argv[2]
if option == '--count':
print_words(filename)
elif option == '--topcount':
print_top(filename)
else:
print 'unknown option: ' + option
sys.exit(1)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment