Skip to content

Instantly share code, notes, and snippets.

@sashiyama
Created June 30, 2018 00:56
Show Gist options
  • Save sashiyama/a10da1bfb0846078d75b9250533c1632 to your computer and use it in GitHub Desktop.
Save sashiyama/a10da1bfb0846078d75b9250533c1632 to your computer and use it in GitHub Desktop.
wc.py
import sys
import re
def count_word(lines):
words = {}
sum = 0
for line in lines:
line = re.sub(r'\r\n|\n|\.|,|-|\"|\'|\[|\]|_|\*|:|\;|\(|\)|\/|\&|!|\?', ' ', line).lower()
n = line.split()
sum += len(n)
for word in n:
if word in words:
words[word] += 1
else:
words[word] = 1
return sorted(words.items(), key = lambda w: w[1], reverse = True), sum
argc = len(sys.argv)
if argc == 1:
lines = sys.stdin.readlines()
wcs, sum = count_word(lines)
elif argc == 2:
try:
f = open(sys.argv[1], 'rU')
lines = f.readlines()
wcs, sum = count_word(lines)
f.close()
except IOError:
print("Error: open file", sys.stderr)
sys.exit(1)
else:
sys.exit('usage: wc [file]')
print('Number of words: {0}'.format(sum))
print('Top 20 frequent words:')
for wc in wcs[:20]:
print(wc)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment