Create a gist now

Instantly share code, notes, and snippets.

@myusuf3 /scraping3.py Secret
Created Jul 3, 2013

What would you like to do?
import requests
from StringIO import StringIO
from lxml.html import parse
STARS = '//*[@id="trending-repositories"]/ol/li[%d]/ul/li[1]/a/text()'
FORKS = '//*[@id="trending-repositories"]/ol/li[%d]/ul/li[2]/a/text()'
USER = '//*[@id="trending-repositories"]/ol/li[%d]/h3/a[1]/text()'
REPO = '//*[@id="trending-repositories"]/ol/li[%d]/h3/a[2]/text()'
def get_explore_page():
response = requests.get('http://www.github.com/explore/')
return StringIO(response.content)
def get_stars(root, index):
stars = root.xpath(STARS % index)
stars = int("".join(stars).replace(',', ''))
return stars
def get_forks(root, index):
forks = root.xpath(FORKS % index)
forks = int("".join(forks).replace(',', ''))
return forks
def get_user(root, index):
user = root.xpath(USER % index)
return user[0]
def get_repo(root, index):
repo = root.xpath(REPO % index)
return repo[0]
def parse_explore(content):
indexes = [1, 2, 3, 4]
trending_repos = []
root = parse(content).getroot()
for index in indexes:
trending_repos.append({
'user': get_user(root, index),
'project': get_repo(root, index),
'stars': get_stars(root, index),
'forks': get_forks(root, index)
})
return trending_repos
def main():
content = get_explore_page()
print parse_explore(content)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment