Created
September 23, 2016 08:01
-
-
Save devkhan/e0991124fc643361f87ddb313bb312f3 to your computer and use it in GitHub Desktop.
GitHub Email ID Crawler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from github import Github | |
from github.AuthenticatedUser import AuthenticatedUser | |
g = Github("devkhan-test", "4cf28f314249c77ea1a64a9a8938b19cf6313e31") | |
file = open('output/indian_users.csv', 'w') | |
all_users = open('output/all_users.txt', 'w') | |
class User(object): | |
name = '' | |
email = '' | |
location = '' | |
username = '' | |
count = 0 | |
def __str__(self): | |
return '%s,%s,%s,%s' % (self.username, self.name, self.email, self.location) | |
visited = set() | |
unvisited = set() | |
count = 0 | |
# def process_user(user: NamedUser): | |
# print(user) | |
# User.count += 1 | |
# if User.count > 10: | |
# return | |
# if type(user.location) is str and 'India' in user.location: | |
# indian_user = User() | |
# indian_user.location = user.location | |
# indian_user.name = user.name | |
# indian_user.username = user.login | |
# indian_user.email = user.email | |
# visited.append(indian_user) | |
# file.write(str(indian_user) + os.linesep) | |
# for follower in user.get_following(): | |
# process_user(follower) | |
# else: | |
# return | |
def process_user(user: AuthenticatedUser): | |
for el in user.get_following(): | |
unvisited.add(el.login) | |
while not len(unvisited) == 0: | |
User.count += 1 | |
if User.count is 1000: | |
file.flush() | |
all_users.flush() | |
u = g.get_user(login=unvisited.pop()) | |
visited.add(u.login) | |
if type(u.location) is str and 'India' in u.location: | |
indian_user = User() | |
indian_user.location = u.location | |
indian_user.name = u.name | |
indian_user.username = u.login | |
indian_user.email = u.email | |
file.write(str(indian_user) + os.linesep) | |
for follower in u.get_following(): | |
if follower.login not in visited: | |
unvisited.add(follower.login) | |
all_users.write(follower.login + os.linesep) | |
process_user(g.get_user()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment