Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Tags-from-joyreactor.py
from bs4 import BeautifulSoup as bs
from urllib2 import urlopen
from time import sleep
soup = bs(html,"lxml")
gen_urls = [x for x in range(80000, 80002)]
base_url = 'http://joyreactor.cc/new/'
enlist_urls =[base_url+str(item) for item in gen_urls]
DATA = {}
for current_page in enlist_urls:
html = urlopen(current_page).read()
for item in soup.findAll('h2',attrs={'class':'taglist'}):
DATA[current_page] = item.text
sleep(1)
if __name__ == '__main__':
pairs = DATA.items()
#pairs.sort(key=lambda x: x[1], reverse=True)
for p in pairs:
print p[0], p[1]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.