Skip to content

Instantly share code, notes, and snippets.

@cyrexcyborg
Created July 28, 2014 06:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cyrexcyborg/b3c5d06b96a2a3ead99a to your computer and use it in GitHub Desktop.
Save cyrexcyborg/b3c5d06b96a2a3ead99a to your computer and use it in GitHub Desktop.
Tags-from-joyreactor.py
from bs4 import BeautifulSoup as bs
from urllib2 import urlopen
from time import sleep
soup = bs(html,"lxml")
gen_urls = [x for x in range(80000, 80002)]
base_url = 'http://joyreactor.cc/new/'
enlist_urls =[base_url+str(item) for item in gen_urls]
DATA = {}
for current_page in enlist_urls:
html = urlopen(current_page).read()
for item in soup.findAll('h2',attrs={'class':'taglist'}):
DATA[current_page] = item.text
sleep(1)
if __name__ == '__main__':
pairs = DATA.items()
#pairs.sort(key=lambda x: x[1], reverse=True)
for p in pairs:
print p[0], p[1]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment