Last active
January 23, 2018 20:27
-
-
Save mmoelli/91e8fafbfbabf7af8b00 to your computer and use it in GitHub Desktop.
[python] Fetch Issues from GitHub API (v3) with Auth token and put them into a CSV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Exports Issues from a list of specified repository to a CSV file | |
Credits go to https://gist.github.com/unbracketed/3380407#file-export_repo_issues_to_csv-py for the initial work, but I had to adjust it a bit | |
FYI: you need to install 'requests' before, best via pip: "$ sudo pip installs requests" | |
""" | |
import csv | |
import requests | |
REPO = ['org/repo1','org/repo2'] # format is username/repo | |
PERSONAL_TOKEN = '' # Your app token | |
headers = {'Authorization': 'token %s' % PERSONAL_TOKEN } | |
params_payload = { 'state' : 'closed', 'since' : '2015-01-01T00:00:00Z' , 'sort' : 'updated'} # Change these parameters based on which issues you are actually searching, see also here: https://developer.github.com/v3/issues/#parameters | |
def write_issues(response): | |
# output a list of issues to csv | |
if not r.status_code == 200: | |
raise Exception(r.status_code, r.json ()) | |
for issue in r.json(): | |
# only proceed, if the issue is no pull requests | |
if 'pull_request' not in issue: | |
# prepare the information to be used in csv, first labels, then truncate too long body texts | |
listlabels = [] | |
for label in issue["labels"]: | |
listlabels.append(label["name"]) | |
truncatebody = "" | |
if issue['body']: | |
truncatebody = issue['body'][:10000] | |
# now create the csv rows. Decide on your own which information you want to use. Be sure to adjust the header as well | |
csvout.writerow([issue['number'], issue['state'].encode('ascii', 'ignore'), issue['title'].encode('ascii', 'ignore'), truncatebody.encode('ascii', 'ignore'), ','.join(listlabels) , issue['created_at'], issue['closed_at']]) | |
i = 0 | |
while (i < len(REPO)): | |
ISSUES_FOR_REPO_URL = 'https://api.github.com/repos/%s/issues' % REPO[i] | |
r = requests.get(ISSUES_FOR_REPO_URL, params=params_payload, headers=headers) | |
print("Starting with Repository: " + REPO[i]) | |
check = True | |
csvfile = '%s-issues.csv' % (REPO[i].replace('/', '-')) | |
csvout = csv.writer(open(csvfile, 'w'), delimiter=',', quotechar='"') | |
csvout.writerow(['id', 'State' ,'Title', 'Body', 'Labels', 'Created At', 'Closed At']) # This is the header to adjust for a proper csv | |
write_issues(r) | |
# Check for more pages using the 'Link' header | |
if 'Link' in r.headers: | |
while check == True: | |
# Create overview regarding the different Links, usually previous, first, last and next | |
data = {} | |
for links in r.headers['Link'].split(","): | |
raw = links.split(";") | |
data[raw[1][6:6+4]] = raw[0].strip() | |
if "next" in data: | |
newlink = data["next"][1:-1] | |
r = requests.get(newlink, headers=headers) | |
print("Now processing page: " + newlink) | |
write_issues(r) | |
if data["next"] == data["last"]: | |
check = False | |
print("Done with Repository: " + REPO[i]) | |
else: | |
check = False | |
print("Done with Repository: " + REPO[i]) | |
else: | |
print("Done with Repository: " + REPO[i]) | |
i = i + 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Updated to work with a full list of repos and also added encoding information (had a problem with the Trademark symbol)