Skip to content

Instantly share code, notes, and snippets.

@lamchau
Last active January 10, 2022 08:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lamchau/14737aec9efde1bc7e89686696952b39 to your computer and use it in GitHub Desktop.
Save lamchau/14737aec9efde1bc7e89686696952b39 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import argparse
import datetime
import json
import logging
import os
import re
import requests
import sys
from types import SimpleNamespace
from typing import Dict, List
MAX_RESULTS = 100
file_handler = logging.FileHandler(filename='debug.log', encoding='utf-8')
stdout_handler = logging.StreamHandler(sys.stdout)
logging.basicConfig(
format='%(asctime)s.%(msecs)03d %(levelname)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
level=logging.DEBUG,
handlers=[file_handler, stdout_handler],
)
# logging.getLogger('urllib3').setLevel(logging.CRITICAL)
def get_query(author: str, created_at: str = None, end_cursor: str = None) -> Dict:
date_query = f'created:{created_at}..*' if created_at else ''
params = {
'query': f'author:{author} org:squareup {date_query}',
'results': MAX_RESULTS,
# hack: conditionally build GQL query to avoid using external packages
'end_cursor': f', after: "{end_cursor}"' if end_cursor else '',
}
# https://docs.github.com/en/graphql/overview/explorer
gql = '''
{
search(query: "%(query)s", type: ISSUE, first: %(results)d%(end_cursor)s) {
pageInfo {
hasNextPage
endCursor
}
nodes {
... on PullRequest {
headRefName
title
body
repository {
name
}
url
createdAt
closedAt
merged
additions
deletions
}
}
}
}'''
logging.debug(f'GQL: {gql % params}')
return {
'query': gql % params
}
parser = argparse.ArgumentParser(description='Download all pull requests')
required = parser.add_argument_group(title='Required')
required.add_argument('--author', required=True, help='the target github author/username')
required.add_argument('--username', required=True, help='[auth] github: username')
required.add_argument('--token', required=True, help='[auth] github: personal access token')
required.add_argument('--created-at', required=False, help='pull requested creation date')
# TODO: add name resolver https://registry.sqprod.co/api/v2/github_identities
if __name__ == '__main__':
if len(sys.argv) < 2:
parser.print_help()
sys.exit(0)
output_dir = os.path.realpath('pull-requests')
logging.debug(f'Checking directory: {output_dir}')
if not os.path.isdir(output_dir):
logging.debug(f'Creating directory: {output_dir}')
os.makedirs(output_dir)
args = parser.parse_args()
session = requests.Session()
session.auth = (args.username, args.token)
issues: List['SimpleNamespace'] = []
author = args.author
created_at = args.created_at
current_date = datetime.datetime.now().strftime('%Y%m%d')
filename = os.path.join(output_dir, f'{author}.{current_date}.json')
logging.debug(f'Output file: {os.path.realpath(filename)}')
jira_regex = r'[A-Z]{2,}-\d+'
has_next_page = True
end_cursor = None
while has_next_page:
if issues:
logging.info(f'Collected {len(issues)} pull requests for {author}')
response = session.post(
url='https://api.github.com/graphql',
json=get_query(author=author, created_at=created_at, end_cursor=end_cursor),
headers={'Accept': 'application/vnd.github.v3+json'},
)
if response.status_code == 200:
data = json.loads(
json.dumps(response.json()),
# hack: recursive SimpleNamespace for easier retrieval of attrs
object_hook=lambda item: SimpleNamespace(**item),
).data
has_next_page = data.search.pageInfo.hasNextPage
end_cursor = data.search.pageInfo.endCursor
logging.debug(f'Page Info: {data.search.pageInfo}')
data.search.nodes = [x for x in data.search.nodes if hasattr(x, 'repository')]
logging.debug(f'Extracting JIRA issues from {len(data.search.nodes)} issue(s)')
for x in data.search.nodes:
x.jira = set()
x.jira.update(re.findall(jira_regex, x.headRefName)) # branch name
x.jira.update(re.findall(jira_regex, x.body)) # description
x.jira.update(re.findall(jira_regex, x.title))
x.jira = [x for x in sorted(list(x.jira)) if not x.startswith('COVID')]
x.repository = x.repository.name
del x.headRefName
del x.body
issues.extend(data.search.nodes)
else:
error_message = f'Query failed {response.status_code}'
logging.error(error_message)
raise Exception(error_message)
if issues:
with open(filename, 'w') as f:
json.dump(issues, f, ensure_ascii=True, indent=2, default=lambda o: o.__dict__)
success_message = (f'Saved {len(issues)} pull requests for {author} to {filename}')
logging.info(success_message)
else:
empty_message = f'No issues found for {author}'
logging.info(empty_message)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment