Skip to content

Instantly share code, notes, and snippets.

@jberkus
Last active December 6, 2016 00:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jberkus/6bc54e11a81d9607ca58754db90d2ec7 to your computer and use it in GitHub Desktop.
Save jberkus/6bc54e11a81d9607ca58754db90d2ec7 to your computer and use it in GitHub Desktop.
Simple script for downloading all of a project's github issues into a PostgreSQL database
#!/usr/bin/env python
# Usage: download_issues.py owner repo postgres_dsn github_token
# simple script for downloading all issues from the given repository
# requires you to set up an API token with no special permissions
# also requires a PostgreSQL database with the following tables:
"""
create table issues (
issue_number int unique not null,
id bigint unique not null,
state text,
title text,
body text,
reporting_login text,
created_at timestamptz,
updated_at timestamptz,
assignee text,
raw jsonb
);
create table comments (
issue_number int references issues(issue_number),
id bigint unique not null,
commenting_login text,
body text,
updated_at timestamptz,
raw jsonb
);
create index on comments(issue_number);
"""
import json
import requests
import github3
import psycopg2
import sys
if len(sys.argv) < 4:
print "Usage:\n download_issues.py owner repo postgres_dsn github_token"
sys.exit(1)
repo_owner = sys.argv[1]
repo = sys.argv[2]
pg_dsn = sys.argv[3]
api_token = sys.argv[4]
# connect to database
conn=psycopg2.connect(pg_dsn)
cur=conn.cursor()
# connect to github
gh = github3.login(token=api_token)
issue_count=1
# loop through issues
for issue_obj in gh.iter_repo_issues(repo_owner,repo,state='open'):
# we're going to work with the raw json for most things
# because the github3 lib leaves out several fields we need
issue = issue_obj.to_json()
# save to database
if issue["assignee"]:
asignee = issue["assignee"]["login"]
try:
cur.execute("""INSERT INTO issues
VALUES ( %s, %s, %s, %s, %s, %s, %s, %s, %s, %s )""",
(issue["number"], issue["id"], issue["state"],
issue["title"], issue["body"], issue["user"]["login"],
issue["created_at"], issue["updated_at"], asignee,
json.dumps(issue),))
except Exception as e:
print 'inserting issue failed:'
print json.dumps(issue, sort_keys=True, indent=2)
print str(e)
sys.exit(3)
# get comments
for comment_obj in issue_obj.iter_comments():
# again, get JSON to get the fields the object doesn't have
comment = comment_obj.to_json()
# save comments to database
try:
cur.execute("""INSERT INTO comments
VALUES (%s, %s, %s, %s, %s, %s)""",
(issue["number"],comment["id"],comment["user"]["login"],
comment["body"],comment["created_at"], json.dumps(comment),))
except:
print 'inserting comment failed:'
print json.dumps(comment, sort_keys=True, indent=2)
sys.exit(3)
conn.commit()
issue_count += 1
if ( issue_count % 10 ) == 0:
print "{0} issues downloaded".format(issue_count)
conn.close()
print "DONE: {0} issues downloaded".format(issue_count)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment