Skip to content

Instantly share code, notes, and snippets.

@SiddharthSudhakar
Last active March 14, 2017 07:33
Show Gist options
  • Save SiddharthSudhakar/50dbe606ebed22605122 to your computer and use it in GitHub Desktop.
Save SiddharthSudhakar/50dbe606ebed22605122 to your computer and use it in GitHub Desktop.
A web crawler to extract the data on ranking and dislay rank, name and profile details. This project is soley for learning purposes
__author__ = 'Siddharth'
import requests
from bs4 import BeautifulSoup
def ranking_list(max_pages):
page=1
while page <= max_pages :
url="https://www.thenewboston.com/search.php?type=0&sort=reputation&page=" + str(page)
source_code= requests.get(url)
plain_text= source_code.text
soup = BeautifulSoup(plain_text)
#for rank in soup.findAll('div',{'class':'ranking'}):
for (link,rank) in zip(soup.findAll('a',{'class':'desc-title'}), soup.findAll('div',{'class':'ranking'})):
profile_rank= rank.string
profile_rank= profile_rank.strip()
profile_link= "https://www.thenewboston.com" + link.get('href')
profile_name= link.string
print (profile_rank)
print (profile_name)
print (profile_link)
print ""
page+=1
ranking_list(5)
# the above is used to disply 5 pages of content
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment