Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
usesthis statistics
import BeautifulSoup
import urllib2
from collections import defaultdict
def getUsers1():
f = open('main.html', 'r')
soup = BeautifulSoup.BeautifulSoup(f)
links = soup.findAll('a')
users = {}
for link in links:
try:
users[link.img['alt']] = link['href']
except:
pass
return users
def getUsers():
f = open('main.html', 'r')
soup = BeautifulSoup.BeautifulSoup(f)
links = soup.findAll('a')
users = {}
for link in links:
try:
users[link.img['alt']] = link['href']
#print link.img['alt']
#print link['href']
imagesrc = 'http://usesthis.com' + link.img['src']
#print link.time.contents[0]
print '<a href="' + link['href'] + '" title="' +link.img['alt']+ '">'+'<img src="'+imagesrc+'" class="user_image"></img></a>'
except:
pass
return users
def downloadUserPages(users):
i = 0
print 'Total: ', len(users)
for user, url in users.items():
print 'processing: ', i, user, url
f = urllib2.urlopen(url)
content = f.read()
filename = ''.join(user.split()) + '.html'
fw = open(filename, 'w')
fw.write(content)
fw.close()
f.close()
i += 1
def getUserItems(users):
user_dict = defaultdict(list)
for user, url in users.items():
filename = ''.join(user.split()) + '.html'
f = open(filename, 'r')
soup = BeautifulSoup.BeautifulSoup(f)
links = soup.findAll('a')
items = {}
for link in links:
user_dict[user].append((link.contents[0], link['href']))
return user_dict
def getItemFreq(user_items):
item_link_dict = {}
item_freq_dict = defaultdict(int)
for user, items in user_items.items():
for item, link in items:
item_freq_dict[item] += 1
item_link_dict[item] = link
for key, value in sorted(item_freq_dict.iteritems(), key=lambda (k,v): (v,k), reverse=True):
link = item_link_dict[key]
item = '<a href="' + link + '">' + key + '</a>'
if value > 1 and value < 70:
print "['%s', %s]," % (item, value)
if __name__ == '__main__':
users = getUsers()
#user_items = getUserItems(users)
#getItemFreq(user_items)
#downloadUserPages(users)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment