Forked from unbracketed/export_repo_issues_to_csv.py
Last active
March 2, 2016 18:21
-
-
Save jfischer/74444b3a247a3c0b9004 to your computer and use it in GitHub Desktop.
Export Issues from Github repo to CSV (API v3)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Exports Issues from a specified repository to a CSV and/or JSON file | |
Uses basic authentication (Github username + password) to retrieve Issues | |
from a repository that username has access to. If a public repository, | |
no user/password needs to be provided. Supports Github API v3. | |
Orginally based on a script by Brian Luft at https://gist.github.com/unbracketed/3380407 | |
This requires installing the 'requests' package (https://pypi.python.org/pypi/requests/). | |
2/2016 Jeff Fischer made the following changes: | |
* Fixed issues with pagination | |
* Removed hard-coded label filtering | |
* Added sorting | |
* Added JSON output | |
* Added command line argument processing and interactive quering for repo/user/password. | |
USAGE | |
----- | |
export_github_issues.py [-h] [--repo REPO] [--user USER] | |
[--public] [--desc] | |
[--csv-only | --json-only] | |
Exports issues from Github to JSON and/or CSV. Output filename is | |
USERNAME-REPO-issues.[csv|json] | |
optional arguments: | |
-h, --help show this help message and exit | |
--repo REPO Github repository of the form user/repo. If not provided, will | |
ask interactively. | |
--user USER Github username. If neither --user nor --public are specified, | |
will ask interactively. | |
--public If specified, this is a public repository and no authentication | |
is needed. If neither --user nor --public are specified, will | |
ask interactively. | |
--desc If specified, sort issues by created datetime in descending | |
order (most recent first). Default is ascending order. | |
--csv-only If specified, only write the issues in CSV form. | |
--json-only If specified, only write the issues in JSON form. | |
""" | |
import csv | |
import json | |
import getpass | |
import sys | |
import argparse | |
import requests # external dependency | |
def write_issues_csv(csvfile, issues): | |
"output a list of issues to csv" | |
csvout = csv.writer(open(csvfile, 'wb')) | |
csvout.writerow(('id', 'Title', 'Body', 'Created At', 'Updated At')) | |
for issue in issues: | |
csvout.writerow([issue['number'], issue['title'].encode('utf-8'), issue['body'].encode('utf-8'), issue['created_at'], issue['updated_at']]) | |
print "Wrote %d issues to %s" % (len(issues), csvfile) | |
def write_issues_json(jsonfile, issues): | |
with open(jsonfile, 'wb') as f: | |
json.dump(issues, f, indent=2) | |
print "Wrote %d issues to %s" % (len(issues), jsonfile) | |
def get_issues(issues_list, url, page, auth=None, direction='asc'): | |
print "getting issues for page %d..." % page | |
r = requests.get(url + '?sort=created&direction=%s&state=all&page=%d' % | |
(direction, page), | |
auth=auth) | |
if not r.status_code == 200: | |
raise Exception(r.status_code) | |
cnt = 0 | |
for issue in r.json(): | |
if 'number' not in issue: | |
raise Exception("Bad issue: %s" % issue) | |
issues_list.append(issue) | |
cnt += 1 | |
print "... found %d issues" % cnt | |
return cnt | |
def gather_parameters(args): | |
"""Given the parsed arguments, gather up any additional parameters what | |
were not specified on the command line. | |
""" | |
if args.repo==None: | |
while True: | |
REPO = raw_input("Please enter your repository in the form 'user/repo': ") | |
if '/' in REPO: | |
break | |
else: | |
REPO = args.repo | |
if args.public == True: | |
GITHUB_USER = '' | |
elif args.user is not None: | |
GITHUB_USER = args.user | |
else: | |
GITHUB_USER = raw_input("Enter your github username (or RETURN if a public repository): ") | |
if GITHUB_USER == '': | |
AUTH = None | |
else: | |
GITHUB_PASSWORD = getpass.getpass("Password for github user %s: " % GITHUB_USER) | |
AUTH = (GITHUB_USER, GITHUB_PASSWORD) | |
URL = 'https://api.github.com/repos/%s/issues' % REPO | |
return (REPO, URL, AUTH) | |
def main(argv=sys.argv[1:]): | |
parser = argparse.ArgumentParser(description="Exports issues from Github to JSON and/or CSV. Output filename is USERNAME-REPO-issues.[csv|json]") | |
parser.add_argument("--repo", dest='repo', action='store', | |
default=None, | |
help="Github repository of the form user/repo. If not provided, will ask interactively.") | |
parser.add_argument("--user", dest='user', action='store', default=None, | |
help="Github username. If neither --user nor --public are specified, will ask interactively.") | |
parser.add_argument("--public", dest='public', action='store_true', default=None, | |
help="If specified, this is a public repository and no authentication is needed. If neither --user nor --public are specified, will ask interactively.") | |
parser.add_argument("--desc", dest="desc", action='store_true', default=False, | |
help="If specified, sort issues by created datetime in descending order (most recent first). Default is ascending order.") | |
group = parser.add_mutually_exclusive_group() | |
group.add_argument("--csv-only", dest="csv_only", action='store_true', default=False, | |
help="If specified, only write the issues in CSV form.") | |
group.add_argument("--json-only", dest="json_only", action='store_true', default=False, | |
help="If specified, only write the issues in JSON form.") | |
args = parser.parse_args(args=argv) | |
(REPO, URL, AUTH) = gather_parameters(args) | |
if not args.json_only: | |
csvfile = '%s-issues.csv' % (REPO.replace('/', '-')) | |
else: | |
csvfile = None | |
if not args.csv_only: | |
jsonfile = '%s-issues.json' % (REPO.replace('/', '-')) | |
else: | |
jsonfile = None | |
issues_list = [] | |
# The github issues API provides pagination info on the 'link' HTTP header. | |
# However, the format is unnecessarily hard to parse. Instead, we can just | |
# ask for the first page and then ask for subsequent pages until we get | |
# an empty result set. | |
issues_returned = get_issues(issues_list, URL, 1, auth=AUTH, | |
direction='desc' if args.desc else 'asc') | |
next_page = 2 | |
while issues_returned>0: | |
issues_returned = get_issues(issues_list, URL, next_page, auth=AUTH, | |
direction='desc' if args.desc else 'asc') | |
next_page += 1 | |
print "Found %d issues total" % len(issues_list) | |
if csvfile: | |
write_issues_csv(csvfile, issues_list) | |
if jsonfile: | |
write_issues_json(jsonfile, issues_list) | |
return 0 | |
if __name__ == "__main__": | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment