Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@myusuf3
Created June 13, 2013 05:03
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save myusuf3/a2016266f22a2e3a8b6c to your computer and use it in GitHub Desktop.
Save myusuf3/a2016266f22a2e3a8b6c to your computer and use it in GitHub Desktop.
little code to scrape github's explore page.
import requests
from BeautifulSoup import BeautifulSoup
GITHUB_EXPLORE_PAGE = 'http://github.com/explore'
def get_html_explore():
response = requests.get(GITHUB_EXPLORE_PAGE)
return response.content
def parse_html(content):
soup = BeautifulSoup(content)
trending_repo_div = soup.find('div', {'id': 'trending-repositories'})
return trending_repo_div.findAll('li')
def repo_breakdown(repo):
header = repo.find('h3')
ul = repo.find('ul')
user, project = header.findAll('a')
stars, forks = ul.findAll('a')
return {'user': user.text, 'project': project.text, 'stars': stars.text, 'forks': forks.text}
def get_data(repos):
trending_repos = []
for repo in repos:
if repo.get('class'):
pass
else:
data = repo_breakdown(repo)
trending_repos.append(data)
return trending_repos
def main():
content = get_html_explore()
repos = parse_html(content)
print get_data(repos)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment