Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Export Issues from Github repo to CSV (API v3)
"""
This is strongly based on https://gist.github.com/unbracketed/3380407;
thanks to @unbracketed and the various commenters on the page.
I've mainly cleaned up the code into basic methods, and included the
various suggestions in the comments. Hope this is useful to someone.
Make sure you have `requests` and `csv` installed via pip then run it:
`python export_gh_issues_to_csv.py`
---
Exports Issues from a specified repository to a CSV file
Uses basic authentication (Github username + password) or token to retrieve Issues
from a repository that username has access to. Supports Github API v3.
"""
import csv
import requests
GITHUB_USER = ''
GITHUB_PASSWORD = ''
GITHUB_TOKEN = ''
REPO = '' # format is username/repo
ISSUES_FOR_REPO_URL = 'https://api.github.com/repos/%s/issues' % REPO
# Update your filter here. See https://developer.github.com/v3/issues/#list-issues-for-a-repository
# Note that filtering is powerful and there are lots of things available. Also that issues and PRs
# arrive in the same results set
params_payload = {'filter' : 'all', 'state' : 'open', 'type': 'issue' }
def write_issues(response, csvout):
"output a list of issues to csv"
print " : Writing %s issues" % len(response.json())
for issue in response.json():
labels = issue['labels']
label_string = ''
for label in labels:
label_string = "%s, %s" % (label_string, label['name'])
label_string = label_string[2:]
csvout.writerow([issue['number'], issue['title'].encode('utf-8'), issue['body'].encode('utf-8'), label_string.encode('utf-8'), issue['created_at'], issue['updated_at']])
def get_issues(url):
kwargs = {
'headers': {
'Content-Type': 'application/vnd.github.v3.raw+json',
'User-Agent': 'GitHub issue exporter'
},
'params': params_payload
}
if GITHUB_TOKEN != '':
kwargs['headers']['Authorization'] = 'token %s' % GITHUB_TOKEN
else:
kwargs['auth'] = (GITHUB_USER, GITHUB_PASSWORD)
print "GET %s" % url
resp = requests.get(url, **kwargs)
print " : => %s" % resp.status_code
# import ipdb; ipdb.set_trace()
if resp.status_code != 200:
raise Exception(resp.status_code)
return resp
def next_page(response):
#more pages? examine the 'link' header returned
if 'link' in response.headers:
pages = dict(
[(rel[6:-1], url[url.index('<')+1:-1]) for url, rel in
[link.split(';') for link in
response.headers['link'].split(',')]])
# import ipdb; ipdb.set_trace()
if 'last' in pages and 'next' in pages:
return pages['next']
return None
def process(csvout, url=ISSUES_FOR_REPO_URL):
resp = get_issues(url)
write_issues(resp, csvout)
next_ = next_page(resp)
if next_ is not None:
process(csvout, next_)
def main():
csvfile = '%s-issues.csv' % (REPO.replace('/', '-'))
csvout = csv.writer(open(csvfile, 'wb'))
csvout.writerow(('id', 'Title', 'Body', 'Labels', 'Created At', 'Updated At'))
process(csvout)
csvfile.close()
main()
@chachra

This comment has been minimized.

Copy link

commented May 3, 2017

Thanks! I had to make some changes to get it to work (worked backwards from issues guessing).

print "  : Writing %s issues" % len(response.json)
for issue in response.json:

Also since body could be None

csvout.writerow([issue['number'], issue['title'].encode('utf-8'), issue['body'].encode('utf-8') if issue['body'] is not None else '', label_string.encode('utf-8'), issue['created_at'], issue['updated_at']])

Also #csvfile.close() gave trouble, so just commented it out!

@chadsten

This comment has been minimized.

Copy link

commented Jul 14, 2017

I also had a slight issue that was pretty easy to solve.

ISSUES_FOR_REPO_URL = 'https://api.github.com/repos/%s/issues' % REPO

This gave me an error, but when I removed % REPO it was fine. Looks like maybe a comment that didn't get properly deleted?

@hinap

This comment has been minimized.

Copy link

commented Aug 12, 2017

I am getting :
simplejson.scanner.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
Any help here would really be appreciated.

The full traceback :
Traceback (most recent call last):
File "extract_gitcsv.py", line 97, in
main()
File "extract_gitcsv.py", line 92, in main
process(csvout)
File "extract_gitcsv.py", line 82, in process
write_issues(resp, csvout)
File "extract_gitcsv.py", line 28, in write_issues
print " : Writing %s issues" % len(response.json())
File "/Library/Python/2.7/site-packages/requests-2.14.2-py2.7.egg/requests/models.py", line 885, in json
return complexjson.loads(self.text, **kwargs)
File "/Library/Python/2.7/site-packages/simplejson/init.py", line 516, in loads
return _default_decoder.decode(s)
File "/Library/Python/2.7/site-packages/simplejson/decoder.py", line 370, in decode
obj, end = self.raw_decode(s)
File "/Library/Python/2.7/site-packages/simplejson/decoder.py", line 400, in raw_decode
return self.scan_once(s, idx=_w(s, idx).end())
simplejson.scanner.JSONDecodeError: Expecting value: line 1 column 1 (char 0)

@abevoelker

This comment has been minimized.

Copy link

commented Aug 29, 2017

Got this error:

GET https://api.github.com/repos/foo/bar/issues
  : => 200
  : Writing 1 issues
Traceback (most recent call last):
  File "issue_copy.py", line 95, in <module>
    main()
  File "issue_copy.py", line 91, in main
    csvfile.close()
AttributeError: 'str' object has no attribute 'close'

Fixed by replacing this:

def main():
    csvfile = '%s-issues.csv' % (REPO.replace('/', '-'))
    csvout = csv.writer(open(csvfile, 'wb'))
    csvout.writerow(('id', 'Title', 'Body', 'Labels', 'Created At', 'Updated At'))
    process(csvout)
    csvfile.close()

With this:

def main():
    csvfile = '%s-issues.csv' % (REPO.replace('/', '-'))
    with open(csvfile, 'wb') as f:
        csvout = csv.writer(f)
        csvout.writerow(('id', 'Title', 'Body', 'Labels', 'Created At', 'Updated At'))
        process(csvout)

In any case I'm going to use something else as this only dumps the initial issue and not any comments/discussion that follows.

@abevoelker

This comment has been minimized.

Copy link

commented Sep 2, 2017

I ended up using this as I wanted to dump a whole organization's repos: https://github.com/josegonzalez/python-github-backup

@Raja-Kankanala

This comment has been minimized.

Copy link

commented Sep 19, 2017

Thanks !
I have added extra columns like assignee ,milestone and changed the order of columns according to my requirement .
Find changes here .

  assignees = issue['assignees']
    assigne_string = ''
    for asignee in assignees:
        assigne_string = "%s %s" % (assigne_string, asignee['login'])
    assigne_string = assigne_string[0:]
    
    milestone_string='';
    milestones = issue['milestone']
  
    if isinstance(milestones, dict):
    
         milestone_string = "%s %s" % (milestone_string, milestones['title'])


    csvout.writerow([issue['number'], issue['title'].encode('utf-8'), issue['body'].encode('utf-8'), label_string.encode('utf-8'),assigne_string,milestone_string, issue['created_at'], issue['updated_at']])
@liosc

This comment has been minimized.

Copy link

commented Sep 21, 2017

Thanks for this very useful code.

I had to change the open parameter from 'wb' to 'w' otherwise I was getting the error "TypeError: a bytes-like object is required, not 'str'".

@PauloPeres

This comment has been minimized.

Copy link

commented May 1, 2018

Hi Guys Here's the code for doing it, with a JSON Approeach
You will have to type the Jira Project Name and Key, and create it before hand.
I'm also adding all the comments from GitHub .
Using Python 3.5.4

import requests
import json

GITHUB_USER = ''
GITHUB_PASSWORD = ''
GITHUB_TOKEN = ''
REPO = ''  # format is username/repo
ISSUES_FOR_REPO_URL = 'https://api.github.com/repos/%s/issues' % REPO

JIRA_PROJECT_NAME = "Example Name APP V1"
JIRA_PROJECT_KEY = "EXP"
DEFAULT_ISSUE_TYPE = "Bug"

# Update your filter here.  See https://developer.github.com/v3/issues/#list-issues-for-a-repository
# Note that filtering is powerful and there are lots of things available. Also that issues and PRs
# arrive in the same results set
params_payload = {'filter' : 'all', 'state' : 'open', 'type': 'issue' }

def parse_json(response):
    data = []
    for full_issue in response.json():
        # Issue Dict
        labels = []
        for label in full_issue['labels']:
            labels.append(label['name'])

        if full_issue['comments'] > 0:
            comment_data = get_issues(full_issue['comments_url'])
            for full_comment in comment_data.json():
                body = '\n\n-------------------------------------------'
                body += "\nGithub Comment from %s" % full_comment['user']['login']
                body += '\n'+full_comment['body']
                full_issue['body'] += body;

        issue = {
            # "priority":
            "description": full_issue['body'],
            # "status": "Closed",
            #  "reporter" : "alice",
            "labels": labels,
            # "watchers" : [ "bob" ],
            # "issueType": DEFAULT_ISSUE_TYPE,
            # "resolution": "Resolved",
            "created": full_issue['created_at'],
            "updated": full_issue['updated_at'],
            #  "affectedVersions" : [ "1.0" ],
            "summary": full_issue['title'],
            "assignee": full_issue['assignee'] and full_issue['assignee']['login'] or "",
            #   "fixedVersions" : [ "1.0", "2.0" ],
            # components" : ["Component", "AnotherComponent"],
            "externalId": full_issue['number'],
            #"history": [
            #    {
            #        "author": "alice",
            #        "created": "2012-08-31T15:59:02.161+0100",
            #        "items": [
            #            {
            #                "fieldType": "jira",
            #                "field": "status",
            #                "from": "1",
            #                "fromString": "Open",
            #                "to": "5",
            #                "toString": "Resolved"
            #            }
            #        ]
            #    }
            #],
            #"customFieldValues": [
            #    {
            #        "fieldName": "Story Points",
            #        "fieldType": "com.atlassian.jira.plugin.system.customfieldtypes:float",
            #        "value": "15"
            #    },
            #    {
            #        "fieldName": "Business Value",
            #        "fieldType": "com.atlassian.jira.plugin.system.customfieldtypes:float",
            #        "value": "34"
            #    }
            #],
            #"attachments": [
            #    {
            #        "name": "battarang.jpg",
            #       "attacher": "admin",
            #        "created": "2012-08-31T17:59:02.161+0100",
            #        "uri": "http://optimus-prime/~batman/images/battarang.jpg",
            #        "description": "This is optimus prime"
            #    }
            #]



        }

        data.append(issue)
    return data

def get_issues(url):
    kwargs = {
        'headers': {
            'Content-Type': 'application/vnd.github.v3.raw+json',
            'User-Agent': 'GitHub issue exporter'
        },
        'params': params_payload
    }
    if GITHUB_TOKEN != '':
        kwargs['headers']['Authorization'] = 'token %s' % GITHUB_TOKEN
    else:
        kwargs['auth'] = (GITHUB_USER, GITHUB_PASSWORD)

    #print("GET %s" % url)
    resp = requests.get(url, **kwargs)
    print("  : => %s" % resp.status_code)
    # print(vars(resp))
    # print("RESPONSE : => %s" % resp)

    # import ipdb; ipdb.set_trace()
    if resp.status_code != 200:
        raise Exception(resp.status_code)

    return resp


def next_page(response):
    #more pages? examine the 'link' header returned
    if 'link' in response.headers:
        pages = dict(
            [(rel[6:-1], url[url.index('<')+1:-1]) for url, rel in
                [link.split(';') for link in
                    response.headers['link'].split(',')]])
        # import ipdb; ipdb.set_trace()
        if 'last' in pages and 'next' in pages:
            return pages['next']

    return None

def process_json(jdata, url=ISSUES_FOR_REPO_URL):
    resp = get_issues(url)
    jdata['projects'][0]['issues'] += parse_json(resp)

    next_ = next_page(resp)
    if next_ is not None:
        return process_json(jdata, next_)
    else:
        return jdata

def main():
    file_name = '%s-issues.json' % (REPO.replace('/', '-'))
    data = {
        'projects': [
            {
                'name': JIRA_PROJECT_NAME,
                'key': JIRA_PROJECT_KEY,
                'issues': []
            }
        ]
    }
    data = process_json(data)

    with open(file_name, 'w') as outfile:
        json.dump(data, outfile)



main()
@sampathkssaravana

This comment has been minimized.

Copy link

commented Aug 1, 2019

how to get projects part of this JSON objects?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.