captainpainway/wordcount.py

## wordcount.py
#!/usr/bin/env python

# Clean text of punctuation, convert to lowercase, count the number of instances of each word, then sort by frequency.
# Intended for non-English text.
# ./wordcount.py [input filename] [export filename]

import sys, re, string, argparse
from collections import OrderedDict

file = open(sys.argv[1], "r")
words = file.read().split()
dict = {}

for word in words:
    pattern = re.compile('[\W]', re.IGNORECASE | re.UNICODE)
    word = pattern.sub('', word).lower();
    if word and word in dict:
        dict[word] += 1
    else:
        dict[word] = 1

sortedWords = OrderedDict(sorted(dict.items(), key=lambda x: x[1], reverse=True))

string = ''
for i in sortedWords:
    num = str(sortedWords[i])
    string = string + i + ', ' + num + "\n"

f = open(sys.argv[2], "w")
f.write(string)
	#!/usr/bin/env python

	# Clean text of punctuation, convert to lowercase, count the number of instances of each word, then sort by frequency.
	# Intended for non-English text.
	# ./wordcount.py [input filename] [export filename]

	import sys, re, string, argparse
	from collections import OrderedDict

	file = open(sys.argv[1], "r")
	words = file.read().split()
	dict = {}

	for word in words:
	pattern = re.compile('[\W]', re.IGNORECASE \| re.UNICODE)
	word = pattern.sub('', word).lower();
	if word and word in dict:
	dict[word] += 1
	else:
	dict[word] = 1

	sortedWords = OrderedDict(sorted(dict.items(), key=lambda x: x[1], reverse=True))

	string = ''
	for i in sortedWords:
	num = str(sortedWords[i])
	string = string + i + ', ' + num + "\n"

	f = open(sys.argv[2], "w")
	f.write(string)