Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 5 You must be signed in to fork a gist
  • Save andydempster/87780652b4ec37794dc6992935556062 to your computer and use it in GitHub Desktop.
Save andydempster/87780652b4ec37794dc6992935556062 to your computer and use it in GitHub Desktop.
Export Issues from Github repo to CSV (API v3)
"""
Exports issues from a list of repositories to individual csv files.
Uses basic authentication (Github username + password) to retrieve issues
from a repository that username has access to. Supports Github API v3.
Forked from: unbracketed/export_repo_issues_to_csv.py
"""
import argparse
import csv
from getpass import getpass
import requests
# encoding=utf8
import sys
reload(sys)
sys.setdefaultencoding('utf8')
auth = None
state = 'open'
def write_issues(r, csvout):
"""Parses JSON response and writes to CSV."""
if r.status_code != 200:
raise Exception(r.status_code)
for issue in r.json():
if 'pull_request' not in issue:
labels = ', '.join([l['name'] for l in issue['labels']])
date = issue['created_at'].split('T')[0]
# Change the following line to write out additional fields
csvout.writerow([labels, issue['number'], issue['title'], issue['state'], date,
issue['html_url']])
def get_issues(name):
"""Requests issues from GitHub API and writes to CSV file."""
url = 'https://api.github.com/repos/{}/issues?state={}'.format(name, state)
r = requests.get(url, auth=auth)
csvfilename = '{}-issues.csv'.format(name.replace('/', '-'))
with open(csvfilename, 'w') as csvfile:
csvout = csv.writer(csvfile)
csvout.writerow(['Labels', 'Number', 'Title', 'State', 'Date', 'URL'])
write_issues(r, csvout)
# Multiple requests are required if response is paged
if 'link' in r.headers:
pages = {rel[6:-1]: url[url.index('<')+1:-1] for url, rel in
(link.split(';') for link in
r.headers['link'].split(','))}
while 'last' in pages and 'next' in pages:
pages = {rel[6:-1]: url[url.index('<')+1:-1] for url, rel in
(link.split(';') for link in
r.headers['link'].split(','))}
r = requests.get(pages['next'], auth=auth)
write_issues(r, csvout)
if pages['next'] == pages['last']:
break
parser = argparse.ArgumentParser(description="Write GitHub repository issues "
"to CSV file.")
parser.add_argument('repositories', nargs='+', help="Repository names, "
"formatted as 'username/repo'")
parser.add_argument('--all', action='store_true', help="Returns both open "
"and closed issues.")
args = parser.parse_args()
if args.all:
state = 'all'
username = input("Username for 'https://github.com': ")
password = getpass("Password for 'https://{}@github.com': ".format(username))
auth = (username, password)
for repository in args.repositories:
get_issues(repository)
@manu4387
Copy link

Traceback (most recent call last):
File "export repository.py", line 72, in
get_issues(repository)
File "export repository.py", line 38, in get_issues
write_issues(r, csvout)
File "export repository.py", line 19, in write_issues
raise Exception(r.status_code)
Exception: 401

Getting this error

@andydempster
Copy link
Author

@manu4387 - what are the steps to reproduce? On OSX you may need to try different versions of python - try running the command using python3 instead of python

@manu4387
Copy link

manu4387 commented Oct 24, 2019 via email

@froberts
Copy link

froberts commented Dec 4, 2019

I am getting this error with the above code:
Traceback (most recent call last):
File "C:\Data\GISData\PythonCode\githubExtract\test3.py", line 14, in
reload(sys)
NameError: name 'reload' is not defined

@andydempster
Copy link
Author

andydempster commented Dec 4, 2019

@froberts

Scripts python github_issues_to_csv.py
usage: github_issues_to_csv.py [-h] [--all] repositories [repositories ...]
github_issues_to_csv.py: error: too few arguments
➜  Scripts python3 github_issues_to_csv.py
Traceback (most recent call last):
  File "github_issues_to_csv.py", line 14, in <module>
    reload(sys)
NameError: name 'reload' is not defined
➜  Scripts

@andydempster
Copy link
Author

@froberts

Looking at the 2to3 automated updater
reload
Converts reload() to importlib.reload().

So if you're running v3 try that - otherwise run in Python 2

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment