Skip to content

Instantly share code, notes, and snippets.

@russelnickson
Created January 17, 2010 21:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save russelnickson/279597 to your computer and use it in GitHub Desktop.
Save russelnickson/279597 to your computer and use it in GitHub Desktop.
Hyper-Linked Tag Cloud
# PROGRAM TO GENERATE HYPER-LINKED TAG CLOUD #
# Run at command line with three parameters
# Source File
# Noise File
# No.of Tags to Generate
import string,sys,os,webbrowser
f1 = open(sys.argv[1])
f2 = open(sys.argv[2])
try:
count = int(sys.argv[3])
except IndexError:
count = 20
content = string.lower(f1.read())
noisecontent = string.lower(f2.read())
workinglist = string.split(content)
cleanlist = []
for item in workinglist:
temp = item.strip(string.punctuation)
cleanlist = cleanlist + [temp,]
freq = {}
for item in cleanlist:
if item in noisecontent:
continue
else:
try:
freq[item] += 1
except KeyError:
freq[item] = 1
def most_common(h):
t = []
for key, value in h.items():
t.append((value, key))
t.sort(reverse = True)
return t
print '\nWORD' +'\t\t ' + 'FREQUENCY'
freq = most_common(freq)
for freque, word in freq[0:count]:
print "%-15s" % word , '\t' , freque
cwd = os.getcwd()
workinglist = content.split('.')
for index in range(count):
link = cwd + '/hyper-linked_tagcloud' + str(index) + '.html'
sen_wit_word = ('',)
for items in workinglist:
if freq[index][1] in items:
sen_wit_word=sen_wit_word + (items,)
f=open(link,'w')
f.write('<html><body>')
for item in sen_wit_word:
item = item.replace(freq[index][1],'<font color="#ff0000">' + freq[index][1] + '</font>')
f.write(item + '\n<br/>')
f.write('</body></html>')
f.close()
link = cwd + '/hyper-linked_tagcloud.html'
f = open(link,'w')
f.write('<html><body>')
index = 0
for freque, word in freq[0:count]:
f.write('<font face = "Times New Roman"size = "' + str((freque*10) / freq[0][0]) + '"color="#23bb07">'+'<a href="' + cwd + '/hyper-linked_tagcloud' + str(index) + '.html">' + word + '</a>' + '\t\t\t</font>')
index += 1
f.write('</body></html>')
f.close()
iecommand = "c:/program files/internet explorer/iexplore.exe %s &"
ie = webbrowser.get(iecommand)
ie.open(link)
# russel nickson
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment