Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
from bs4 import BeautifulSoup as bs
from urllib2 import urlopen
from time import sleep
soup = bs(html,"lxml")
gen_urls = [x for x in range(80000, 80002)]
base_url = ''
enlist_urls =[base_url+str(item) for item in gen_urls]
DATA = {}
for current_page in enlist_urls:
html = urlopen(current_page).read()
for item in soup.findAll('h2',attrs={'class':'taglist'}):
DATA[current_page] = item.text
if __name__ == '__main__':
pairs = DATA.items()
#pairs.sort(key=lambda x: x[1], reverse=True)
for p in pairs:
print p[0], p[1]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.