Last active
December 6, 2016 00:46
-
-
Save jberkus/6bc54e11a81d9607ca58754db90d2ec7 to your computer and use it in GitHub Desktop.
Simple script for downloading all of a project's github issues into a PostgreSQL database
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Usage: download_issues.py owner repo postgres_dsn github_token | |
# simple script for downloading all issues from the given repository | |
# requires you to set up an API token with no special permissions | |
# also requires a PostgreSQL database with the following tables: | |
""" | |
create table issues ( | |
issue_number int unique not null, | |
id bigint unique not null, | |
state text, | |
title text, | |
body text, | |
reporting_login text, | |
created_at timestamptz, | |
updated_at timestamptz, | |
assignee text, | |
raw jsonb | |
); | |
create table comments ( | |
issue_number int references issues(issue_number), | |
id bigint unique not null, | |
commenting_login text, | |
body text, | |
updated_at timestamptz, | |
raw jsonb | |
); | |
create index on comments(issue_number); | |
""" | |
import json | |
import requests | |
import github3 | |
import psycopg2 | |
import sys | |
if len(sys.argv) < 4: | |
print "Usage:\n download_issues.py owner repo postgres_dsn github_token" | |
sys.exit(1) | |
repo_owner = sys.argv[1] | |
repo = sys.argv[2] | |
pg_dsn = sys.argv[3] | |
api_token = sys.argv[4] | |
# connect to database | |
conn=psycopg2.connect(pg_dsn) | |
cur=conn.cursor() | |
# connect to github | |
gh = github3.login(token=api_token) | |
issue_count=1 | |
# loop through issues | |
for issue_obj in gh.iter_repo_issues(repo_owner,repo,state='open'): | |
# we're going to work with the raw json for most things | |
# because the github3 lib leaves out several fields we need | |
issue = issue_obj.to_json() | |
# save to database | |
if issue["assignee"]: | |
asignee = issue["assignee"]["login"] | |
try: | |
cur.execute("""INSERT INTO issues | |
VALUES ( %s, %s, %s, %s, %s, %s, %s, %s, %s, %s )""", | |
(issue["number"], issue["id"], issue["state"], | |
issue["title"], issue["body"], issue["user"]["login"], | |
issue["created_at"], issue["updated_at"], asignee, | |
json.dumps(issue),)) | |
except Exception as e: | |
print 'inserting issue failed:' | |
print json.dumps(issue, sort_keys=True, indent=2) | |
print str(e) | |
sys.exit(3) | |
# get comments | |
for comment_obj in issue_obj.iter_comments(): | |
# again, get JSON to get the fields the object doesn't have | |
comment = comment_obj.to_json() | |
# save comments to database | |
try: | |
cur.execute("""INSERT INTO comments | |
VALUES (%s, %s, %s, %s, %s, %s)""", | |
(issue["number"],comment["id"],comment["user"]["login"], | |
comment["body"],comment["created_at"], json.dumps(comment),)) | |
except: | |
print 'inserting comment failed:' | |
print json.dumps(comment, sort_keys=True, indent=2) | |
sys.exit(3) | |
conn.commit() | |
issue_count += 1 | |
if ( issue_count % 10 ) == 0: | |
print "{0} issues downloaded".format(issue_count) | |
conn.close() | |
print "DONE: {0} issues downloaded".format(issue_count) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment