Skip to content

Instantly share code, notes, and snippets.

@michaeldorner
Created January 27, 2022 08:36
Show Gist options
  • Save michaeldorner/ca2f8e0b56c89012f7ae1007645d73e5 to your computer and use it in GitHub Desktop.
Save michaeldorner/ca2f8e0b56c89012f7ae1007645d73e5 to your computer and use it in GitHub Desktop.
Collects all pulls and related information from GitHub
import argparse
import json
from tqdm.auto import tqdm
from agithub.GitHub import GitHub
def get_pulls(client, owner, repository):
pulls = []
status, pulls = client.repos[owner][repository].pulls.get(state='all')
if status != 200:
raise Exception(status)
return pulls
def get_commits(client, owner, repository, pulls):
commits = dict()
for pull in tqdm(pulls, desc='Commits'):
pr_number = pull['number']
status, pr_commits = client.repos[owner][repository].pulls[pr_number].commits.get(
)
if status != 200:
raise Exception(status)
commits[pr_number] = pr_commits
return commits
def get_timelines(client, owner, repository, pulls):
timelines = dict()
for pull in tqdm(pulls, desc='timelines'):
pr_number = pull['number']
status, timeline = client.repos[owner][repository].issues[pr_number].timeline.get(
)
if status != 200:
raise Exception(status)
timelines[pr_number] = timeline
return timelines
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='desc')
parser.add_argument('owner', type=str)
parser.add_argument('repository', type=str)
parser.add_argument('token', type=str)
parser.add_argument('--pulls', '-p', dest='pulls_file',
default='./pulls.json')
parser.add_argument('--cached_pulls', '-cp',
dest='cached_pulls_file', default=None)
parser.add_argument('--commits', '-c', dest='commits_file', default=None)
parser.add_argument('--timeline', '-t', dest='timeline_file', default=None)
args = parser.parse_args()
client = GitHub(token=args.token, paginate=True)
owner = args.owner
repository = args.repository
pulls = []
if args.cached_pulls_file:
print('Load pulls from cache file {}...'.format(args.cached_pulls_file))
with open(args.cached_pulls_file, 'r') as json_file:
pulls = json.load(json_file)
else:
print('Load pulls from {0}/{1}...'.format(owner, repository))
pulls = get_pulls(client, owner, repository)
with open(args.pulls_file, 'w') as json_file:
json.dump(pulls, json_file)
if args.commits_file:
print('Load pulls from {0}/{1}...'.format(owner, repository))
commits = get_commits(client, owner, repository, pulls)
with open(args.commits_file, 'w') as json_file:
json.dump(commits, json_file)
if args.timeline_file:
print('Load pulls from {0}/{1}...'.format(owner, repository))
timelines = get_timelines(client, owner, repository, pulls)
with open(args.timeline_file, 'w') as json_file:
json.dump(timelines, json_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment