Skip to content

Instantly share code, notes, and snippets.

Created March 31, 2013 16:32
Show Gist options
  • Save anonymous/5281189 to your computer and use it in GitHub Desktop.
Save anonymous/5281189 to your computer and use it in GitHub Desktop.
from datetime import datetime
a = open('gs2.txt')
b = a.read()
a.close()
b = b.decode('utf-8')
c = b.split()
thelist = []
print "Starting find at", str(datetime.now())
for word in c:
didfind = False
for entry in thelist:
if word in entry:
thelist[thelist.index(entry)][1] += 1
didfind = True
if didfind == False:
thelist.append([word, 1])
print "Finished find at", str(datetime.now())
final = sorted(thelist, key=lambda word: word[1])
final.reverse()
with open('output.txt', 'w') as d:
for item in final:
d.write(str(item[0].encode('utf-8')) + ' ' + str(item[1]) + '\n')
@SVilgelm
Copy link

SVilgelm commented Apr 1, 2013

import re


RE_WORDS = re.compile('\W+')


def words_count(text):
    return len([w for w in RE_WORDS.split(text) if text])

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment