Skip to content

Instantly share code, notes, and snippets.

@jfischer
Forked from unbracketed/export_repo_issues_to_csv.py
Last active March 2, 2016 18:21
Show Gist options
  • Save jfischer/74444b3a247a3c0b9004 to your computer and use it in GitHub Desktop.
Save jfischer/74444b3a247a3c0b9004 to your computer and use it in GitHub Desktop.
Export Issues from Github repo to CSV (API v3)
#!/usr/bin/env python
"""
Exports Issues from a specified repository to a CSV and/or JSON file
Uses basic authentication (Github username + password) to retrieve Issues
from a repository that username has access to. If a public repository,
no user/password needs to be provided. Supports Github API v3.
Orginally based on a script by Brian Luft at https://gist.github.com/unbracketed/3380407
This requires installing the 'requests' package (https://pypi.python.org/pypi/requests/).
2/2016 Jeff Fischer made the following changes:
* Fixed issues with pagination
* Removed hard-coded label filtering
* Added sorting
* Added JSON output
* Added command line argument processing and interactive quering for repo/user/password.
USAGE
-----
export_github_issues.py [-h] [--repo REPO] [--user USER]
[--public] [--desc]
[--csv-only | --json-only]
Exports issues from Github to JSON and/or CSV. Output filename is
USERNAME-REPO-issues.[csv|json]
optional arguments:
-h, --help show this help message and exit
--repo REPO Github repository of the form user/repo. If not provided, will
ask interactively.
--user USER Github username. If neither --user nor --public are specified,
will ask interactively.
--public If specified, this is a public repository and no authentication
is needed. If neither --user nor --public are specified, will
ask interactively.
--desc If specified, sort issues by created datetime in descending
order (most recent first). Default is ascending order.
--csv-only If specified, only write the issues in CSV form.
--json-only If specified, only write the issues in JSON form.
"""
import csv
import json
import getpass
import sys
import argparse
import requests # external dependency
def write_issues_csv(csvfile, issues):
"output a list of issues to csv"
csvout = csv.writer(open(csvfile, 'wb'))
csvout.writerow(('id', 'Title', 'Body', 'Created At', 'Updated At'))
for issue in issues:
csvout.writerow([issue['number'], issue['title'].encode('utf-8'), issue['body'].encode('utf-8'), issue['created_at'], issue['updated_at']])
print "Wrote %d issues to %s" % (len(issues), csvfile)
def write_issues_json(jsonfile, issues):
with open(jsonfile, 'wb') as f:
json.dump(issues, f, indent=2)
print "Wrote %d issues to %s" % (len(issues), jsonfile)
def get_issues(issues_list, url, page, auth=None, direction='asc'):
print "getting issues for page %d..." % page
r = requests.get(url + '?sort=created&direction=%s&state=all&page=%d' %
(direction, page),
auth=auth)
if not r.status_code == 200:
raise Exception(r.status_code)
cnt = 0
for issue in r.json():
if 'number' not in issue:
raise Exception("Bad issue: %s" % issue)
issues_list.append(issue)
cnt += 1
print "... found %d issues" % cnt
return cnt
def gather_parameters(args):
"""Given the parsed arguments, gather up any additional parameters what
were not specified on the command line.
"""
if args.repo==None:
while True:
REPO = raw_input("Please enter your repository in the form 'user/repo': ")
if '/' in REPO:
break
else:
REPO = args.repo
if args.public == True:
GITHUB_USER = ''
elif args.user is not None:
GITHUB_USER = args.user
else:
GITHUB_USER = raw_input("Enter your github username (or RETURN if a public repository): ")
if GITHUB_USER == '':
AUTH = None
else:
GITHUB_PASSWORD = getpass.getpass("Password for github user %s: " % GITHUB_USER)
AUTH = (GITHUB_USER, GITHUB_PASSWORD)
URL = 'https://api.github.com/repos/%s/issues' % REPO
return (REPO, URL, AUTH)
def main(argv=sys.argv[1:]):
parser = argparse.ArgumentParser(description="Exports issues from Github to JSON and/or CSV. Output filename is USERNAME-REPO-issues.[csv|json]")
parser.add_argument("--repo", dest='repo', action='store',
default=None,
help="Github repository of the form user/repo. If not provided, will ask interactively.")
parser.add_argument("--user", dest='user', action='store', default=None,
help="Github username. If neither --user nor --public are specified, will ask interactively.")
parser.add_argument("--public", dest='public', action='store_true', default=None,
help="If specified, this is a public repository and no authentication is needed. If neither --user nor --public are specified, will ask interactively.")
parser.add_argument("--desc", dest="desc", action='store_true', default=False,
help="If specified, sort issues by created datetime in descending order (most recent first). Default is ascending order.")
group = parser.add_mutually_exclusive_group()
group.add_argument("--csv-only", dest="csv_only", action='store_true', default=False,
help="If specified, only write the issues in CSV form.")
group.add_argument("--json-only", dest="json_only", action='store_true', default=False,
help="If specified, only write the issues in JSON form.")
args = parser.parse_args(args=argv)
(REPO, URL, AUTH) = gather_parameters(args)
if not args.json_only:
csvfile = '%s-issues.csv' % (REPO.replace('/', '-'))
else:
csvfile = None
if not args.csv_only:
jsonfile = '%s-issues.json' % (REPO.replace('/', '-'))
else:
jsonfile = None
issues_list = []
# The github issues API provides pagination info on the 'link' HTTP header.
# However, the format is unnecessarily hard to parse. Instead, we can just
# ask for the first page and then ask for subsequent pages until we get
# an empty result set.
issues_returned = get_issues(issues_list, URL, 1, auth=AUTH,
direction='desc' if args.desc else 'asc')
next_page = 2
while issues_returned>0:
issues_returned = get_issues(issues_list, URL, next_page, auth=AUTH,
direction='desc' if args.desc else 'asc')
next_page += 1
print "Found %d issues total" % len(issues_list)
if csvfile:
write_issues_csv(csvfile, issues_list)
if jsonfile:
write_issues_json(jsonfile, issues_list)
return 0
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment