Skip to content

Instantly share code, notes, and snippets.

@eiszfuchs
Last active August 29, 2015 14:07
Show Gist options
  • Save eiszfuchs/e10180a26fd5a19b3e3c to your computer and use it in GitHub Desktop.
Save eiszfuchs/e10180a26fd5a19b3e3c to your computer and use it in GitHub Desktop.
import re
import requests
from prettytable import PrettyTable
from pyquery import PyQuery as pq
url = "http://www.apple.com/"
p = re.compile(r'\W+')
r = requests.get(url)
document = pq(r.text)
document.find("script, style").remove()
words = document.text()
words = p.split(words)
stats = {}
for word in words:
if not word in stats:
stats[word] = 0
stats[word] += 1
x = PrettyTable(["Word", "Count"])
x.align["Word"] = "l"
for word, count in stats.iteritems():
if count > 1:
x.add_row([word, count])
print(url)
print(x.get_string(sortby="Count", reversesort=True))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment