Skip to content

Instantly share code, notes, and snippets.

@ih4cku
Created March 2, 2016 15:50
Show Gist options
  • Save ih4cku/c86730dcd6592e9788b7 to your computer and use it in GitHub Desktop.
Save ih4cku/c86730dcd6592e9788b7 to your computer and use it in GitHub Desktop.
#!/bin/env python
import requests
from bs4 import BeautifulSoup as Soup
import cPickle
def getStarers(soup):
stars = soup.select('#repos .follow-list-item')
starers = []
for s in stars:
starers.append(s.a['href']+'\n')
return starers
def getNextPage(soup):
next_ele = soup.select('#repos > div.paginate-container > .pagination')
if not next_ele:
return None
for a in next_ele[0].select('a'):
if a.string == 'Next':
return a['href']
else:
None
def getAllStarers(url, save_file):
allstarers = []
while url:
starpage = requests.get(url).content
soup = Soup(starpage, 'lxml')
starers = getStarers(soup)
allstarers += starers
print len(starers), url
url = getNextPage(soup)
with open(save_file, 'w') as f:
f.writelines(allstarers)
return allstarers
repos = ['captcha.irctc', 'captcha', 'reddit.captcha']
STAR_URL = 'https://github.com/arunpatala/%s/stargazers'
both_starers = []
prev_starers = []
for r in repos:
url = STAR_URL % (r,)
curr_starers = getAllStarers(url, r+'.txt')
if prev_starers:
both_starers = set(curr_starers) & set(prev_starers)
prev_starers = curr_starers
with open('both_starers.txt', 'w') as f:
f.writelines(list(both_starers))
print 'done.'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment