Skip to content

Instantly share code, notes, and snippets.

@russelnickson
Created January 17, 2010 17:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save russelnickson/279462 to your computer and use it in GitHub Desktop.
Save russelnickson/279462 to your computer and use it in GitHub Desktop.
Generate a Tag Cloud
import string,sys
f1=open(sys.argv[1])
f2=open(sys.argv[2])
try:
count=int(sys.argv[3])
except IndexError:
count=20
content= string.lower(f1.read())
noisecontent= string.lower(f2.read())
workinglist =string.split(content)
cleanlist =[]
for item in workinglist:
temp=item.strip(string.punctuation)
cleanlist=cleanlist+[temp,]
freq = {}
for item in cleanlist:
if item in noisecontent:
continue
else:
try:
freq[item] += 1
except KeyError:
freq[item] = 1
print '\nWORD'+' \t '+'FREQUENCY'
def most_common(h):
t = []
for key, value in h.items():
t.append((value, key))
t.sort(reverse=True)
return t
freq = most_common(freq)
for freque, word in freq[0:count]:
print word,' \t', freque
f=open('c:/tiny_projects/tagcloud.html','w')
f.write('<html><body>')
for freque, word in freq[0:count]:
f.write('<font face="Times New Roman"size="'+str((freque*10)/freq[0][0])+'"color="#23bb07">'+word+'\t\t\t</font>')
f.write('</body></html>')
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment