Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 19 You must be signed in to star a gist
  • Fork 11 You must be signed in to fork a gist
  • Save marcelkornblum/21be3c13b2271d1d5a89bf08cbfa500e to your computer and use it in GitHub Desktop.
Save marcelkornblum/21be3c13b2271d1d5a89bf08cbfa500e to your computer and use it in GitHub Desktop.
Export Issues from Github repo to CSV (API v3)
"""
This is strongly based on https://gist.github.com/unbracketed/3380407;
thanks to @unbracketed and the various commenters on the page.
I've mainly cleaned up the code into basic methods, and included the
various suggestions in the comments. Hope this is useful to someone.
Make sure you have `requests` and `csv` installed via pip then run it:
`python export_gh_issues_to_csv.py`
---
Exports Issues from a specified repository to a CSV file
Uses basic authentication (Github username + password) or token to retrieve Issues
from a repository that username has access to. Supports Github API v3.
"""
import csv
import requests
GITHUB_USER = ''
GITHUB_PASSWORD = ''
GITHUB_TOKEN = ''
REPO = '' # format is username/repo
ISSUES_FOR_REPO_URL = 'https://api.github.com/repos/%s/issues' % REPO
# Update your filter here. See https://developer.github.com/v3/issues/#list-issues-for-a-repository
# Note that filtering is powerful and there are lots of things available. Also that issues and PRs
# arrive in the same results set
params_payload = {'filter' : 'all', 'state' : 'open', 'type': 'issue' }
def write_issues(response, csvout):
"output a list of issues to csv"
print " : Writing %s issues" % len(response.json())
for issue in response.json():
labels = issue['labels']
label_string = ''
for label in labels:
label_string = "%s, %s" % (label_string, label['name'])
label_string = label_string[2:]
csvout.writerow([issue['number'], issue['title'].encode('utf-8'), issue['body'].encode('utf-8'), label_string.encode('utf-8'), issue['created_at'], issue['updated_at']])
def get_issues(url):
kwargs = {
'headers': {
'Content-Type': 'application/vnd.github.v3.raw+json',
'User-Agent': 'GitHub issue exporter'
},
'params': params_payload
}
if GITHUB_TOKEN != '':
kwargs['headers']['Authorization'] = 'token %s' % GITHUB_TOKEN
else:
kwargs['auth'] = (GITHUB_USER, GITHUB_PASSWORD)
print "GET %s" % url
resp = requests.get(url, **kwargs)
print " : => %s" % resp.status_code
# import ipdb; ipdb.set_trace()
if resp.status_code != 200:
raise Exception(resp.status_code)
return resp
def next_page(response):
#more pages? examine the 'link' header returned
if 'link' in response.headers:
pages = dict(
[(rel[6:-1], url[url.index('<')+1:-1]) for url, rel in
[link.split(';') for link in
response.headers['link'].split(',')]])
# import ipdb; ipdb.set_trace()
if 'last' in pages and 'next' in pages:
return pages['next']
return None
def process(csvout, url=ISSUES_FOR_REPO_URL):
resp = get_issues(url)
write_issues(resp, csvout)
next_ = next_page(resp)
if next_ is not None:
process(csvout, next_)
def main():
csvfile = '%s-issues.csv' % (REPO.replace('/', '-'))
csvout = csv.writer(open(csvfile, 'wb'))
csvout.writerow(('id', 'Title', 'Body', 'Labels', 'Created At', 'Updated At'))
process(csvout)
csvfile.close()
main()
@chachra
Copy link

chachra commented May 3, 2017

Thanks! I had to make some changes to get it to work (worked backwards from issues guessing).

print "  : Writing %s issues" % len(response.json)
for issue in response.json:

Also since body could be None

csvout.writerow([issue['number'], issue['title'].encode('utf-8'), issue['body'].encode('utf-8') if issue['body'] is not None else '', label_string.encode('utf-8'), issue['created_at'], issue['updated_at']])

Also #csvfile.close() gave trouble, so just commented it out!

@chadsten
Copy link

I also had a slight issue that was pretty easy to solve.

ISSUES_FOR_REPO_URL = 'https://api.github.com/repos/%s/issues' % REPO

This gave me an error, but when I removed % REPO it was fine. Looks like maybe a comment that didn't get properly deleted?

@hinap
Copy link

hinap commented Aug 12, 2017

I am getting :
simplejson.scanner.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
Any help here would really be appreciated.

The full traceback :
Traceback (most recent call last):
File "extract_gitcsv.py", line 97, in
main()
File "extract_gitcsv.py", line 92, in main
process(csvout)
File "extract_gitcsv.py", line 82, in process
write_issues(resp, csvout)
File "extract_gitcsv.py", line 28, in write_issues
print " : Writing %s issues" % len(response.json())
File "/Library/Python/2.7/site-packages/requests-2.14.2-py2.7.egg/requests/models.py", line 885, in json
return complexjson.loads(self.text, **kwargs)
File "/Library/Python/2.7/site-packages/simplejson/init.py", line 516, in loads
return _default_decoder.decode(s)
File "/Library/Python/2.7/site-packages/simplejson/decoder.py", line 370, in decode
obj, end = self.raw_decode(s)
File "/Library/Python/2.7/site-packages/simplejson/decoder.py", line 400, in raw_decode
return self.scan_once(s, idx=_w(s, idx).end())
simplejson.scanner.JSONDecodeError: Expecting value: line 1 column 1 (char 0)

@abevoelker
Copy link

Got this error:

GET https://api.github.com/repos/foo/bar/issues
  : => 200
  : Writing 1 issues
Traceback (most recent call last):
  File "issue_copy.py", line 95, in <module>
    main()
  File "issue_copy.py", line 91, in main
    csvfile.close()
AttributeError: 'str' object has no attribute 'close'

Fixed by replacing this:

def main():
    csvfile = '%s-issues.csv' % (REPO.replace('/', '-'))
    csvout = csv.writer(open(csvfile, 'wb'))
    csvout.writerow(('id', 'Title', 'Body', 'Labels', 'Created At', 'Updated At'))
    process(csvout)
    csvfile.close()

With this:

def main():
    csvfile = '%s-issues.csv' % (REPO.replace('/', '-'))
    with open(csvfile, 'wb') as f:
        csvout = csv.writer(f)
        csvout.writerow(('id', 'Title', 'Body', 'Labels', 'Created At', 'Updated At'))
        process(csvout)

In any case I'm going to use something else as this only dumps the initial issue and not any comments/discussion that follows.

@abevoelker
Copy link

I ended up using this as I wanted to dump a whole organization's repos: https://github.com/josegonzalez/python-github-backup

@Raja-Kankanala
Copy link

Raja-Kankanala commented Sep 19, 2017

Thanks !
I have added extra columns like assignee ,milestone and changed the order of columns according to my requirement .
Find changes here .

  assignees = issue['assignees']
    assigne_string = ''
    for asignee in assignees:
        assigne_string = "%s %s" % (assigne_string, asignee['login'])
    assigne_string = assigne_string[0:]
    
    milestone_string='';
    milestones = issue['milestone']
  
    if isinstance(milestones, dict):
    
         milestone_string = "%s %s" % (milestone_string, milestones['title'])


    csvout.writerow([issue['number'], issue['title'].encode('utf-8'), issue['body'].encode('utf-8'), label_string.encode('utf-8'),assigne_string,milestone_string, issue['created_at'], issue['updated_at']])

@liosc
Copy link

liosc commented Sep 21, 2017

Thanks for this very useful code.

I had to change the open parameter from 'wb' to 'w' otherwise I was getting the error "TypeError: a bytes-like object is required, not 'str'".

@PauloPeres
Copy link

Hi Guys Here's the code for doing it, with a JSON Approeach
You will have to type the Jira Project Name and Key, and create it before hand.
I'm also adding all the comments from GitHub .
Using Python 3.5.4

import requests
import json

GITHUB_USER = ''
GITHUB_PASSWORD = ''
GITHUB_TOKEN = ''
REPO = ''  # format is username/repo
ISSUES_FOR_REPO_URL = 'https://api.github.com/repos/%s/issues' % REPO

JIRA_PROJECT_NAME = "Example Name APP V1"
JIRA_PROJECT_KEY = "EXP"
DEFAULT_ISSUE_TYPE = "Bug"

# Update your filter here.  See https://developer.github.com/v3/issues/#list-issues-for-a-repository
# Note that filtering is powerful and there are lots of things available. Also that issues and PRs
# arrive in the same results set
params_payload = {'filter' : 'all', 'state' : 'open', 'type': 'issue' }

def parse_json(response):
    data = []
    for full_issue in response.json():
        # Issue Dict
        labels = []
        for label in full_issue['labels']:
            labels.append(label['name'])

        if full_issue['comments'] > 0:
            comment_data = get_issues(full_issue['comments_url'])
            for full_comment in comment_data.json():
                body = '\n\n-------------------------------------------'
                body += "\nGithub Comment from %s" % full_comment['user']['login']
                body += '\n'+full_comment['body']
                full_issue['body'] += body;

        issue = {
            # "priority":
            "description": full_issue['body'],
            # "status": "Closed",
            #  "reporter" : "alice",
            "labels": labels,
            # "watchers" : [ "bob" ],
            # "issueType": DEFAULT_ISSUE_TYPE,
            # "resolution": "Resolved",
            "created": full_issue['created_at'],
            "updated": full_issue['updated_at'],
            #  "affectedVersions" : [ "1.0" ],
            "summary": full_issue['title'],
            "assignee": full_issue['assignee'] and full_issue['assignee']['login'] or "",
            #   "fixedVersions" : [ "1.0", "2.0" ],
            # components" : ["Component", "AnotherComponent"],
            "externalId": full_issue['number'],
            #"history": [
            #    {
            #        "author": "alice",
            #        "created": "2012-08-31T15:59:02.161+0100",
            #        "items": [
            #            {
            #                "fieldType": "jira",
            #                "field": "status",
            #                "from": "1",
            #                "fromString": "Open",
            #                "to": "5",
            #                "toString": "Resolved"
            #            }
            #        ]
            #    }
            #],
            #"customFieldValues": [
            #    {
            #        "fieldName": "Story Points",
            #        "fieldType": "com.atlassian.jira.plugin.system.customfieldtypes:float",
            #        "value": "15"
            #    },
            #    {
            #        "fieldName": "Business Value",
            #        "fieldType": "com.atlassian.jira.plugin.system.customfieldtypes:float",
            #        "value": "34"
            #    }
            #],
            #"attachments": [
            #    {
            #        "name": "battarang.jpg",
            #       "attacher": "admin",
            #        "created": "2012-08-31T17:59:02.161+0100",
            #        "uri": "http://optimus-prime/~batman/images/battarang.jpg",
            #        "description": "This is optimus prime"
            #    }
            #]



        }

        data.append(issue)
    return data

def get_issues(url):
    kwargs = {
        'headers': {
            'Content-Type': 'application/vnd.github.v3.raw+json',
            'User-Agent': 'GitHub issue exporter'
        },
        'params': params_payload
    }
    if GITHUB_TOKEN != '':
        kwargs['headers']['Authorization'] = 'token %s' % GITHUB_TOKEN
    else:
        kwargs['auth'] = (GITHUB_USER, GITHUB_PASSWORD)

    #print("GET %s" % url)
    resp = requests.get(url, **kwargs)
    print("  : => %s" % resp.status_code)
    # print(vars(resp))
    # print("RESPONSE : => %s" % resp)

    # import ipdb; ipdb.set_trace()
    if resp.status_code != 200:
        raise Exception(resp.status_code)

    return resp


def next_page(response):
    #more pages? examine the 'link' header returned
    if 'link' in response.headers:
        pages = dict(
            [(rel[6:-1], url[url.index('<')+1:-1]) for url, rel in
                [link.split(';') for link in
                    response.headers['link'].split(',')]])
        # import ipdb; ipdb.set_trace()
        if 'last' in pages and 'next' in pages:
            return pages['next']

    return None

def process_json(jdata, url=ISSUES_FOR_REPO_URL):
    resp = get_issues(url)
    jdata['projects'][0]['issues'] += parse_json(resp)

    next_ = next_page(resp)
    if next_ is not None:
        return process_json(jdata, next_)
    else:
        return jdata

def main():
    file_name = '%s-issues.json' % (REPO.replace('/', '-'))
    data = {
        'projects': [
            {
                'name': JIRA_PROJECT_NAME,
                'key': JIRA_PROJECT_KEY,
                'issues': []
            }
        ]
    }
    data = process_json(data)

    with open(file_name, 'w') as outfile:
        json.dump(data, outfile)



main()

@sampathkssaravana
Copy link

sampathkssaravana commented Aug 1, 2019

how to get projects part of this JSON objects?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment