Skip to content

Instantly share code, notes, and snippets.

@senko
Created July 12, 2019 09:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save senko/20af8db7a55afde8bef32d7598988cf6 to your computer and use it in GitHub Desktop.
Save senko/20af8db7a55afde8bef32d7598988cf6 to your computer and use it in GitHub Desktop.
Backup GitHub issues from a single repository
#!/usr/bin/env python
#
# Backs up all issues (open and closed) from a specified repository, together with their comments
# Uses GitHub API v3 and requires a valid API token
#
# Installation:
# python3 -m venv /path/to/virtualenv
# source /path/to/virtualenv/bin/activate
# pip install requests
# Usage:
# github-issue-backup.py <api_token> <owner/repo> <output_dir>
#
# This will save the list of all issues in <output_dir>/all-issues.json (creating the directory
# if needed), and each issue separately in <output_dir>/issue-<number>.json (together with comments).
import os.path
import os
import sys
import requests
import json
API_BASE = 'https://api.github.com/repos/'
def make_backup(api_token, repo, output_dir):
def get(url):
return requests.get(url, headers=dict(authorization='Token ' + api_token))
all_issues_fname = os.path.join(output_dir, 'all-issues.json')
issues = []
if os.path.exists(all_issues_fname):
issues = json.load(open(all_issues_fname))
sys.stdout.write("Loaded cached issue list, %d issues found\n" % len(issues))
else:
page = 1
sys.stdout.write("Downloading issue list ")
while True:
sys.stdout.write('.')
sys.stdout.flush()
issues_json = get(API_BASE + repo + '/issues?state=all&per_page=100&page=%d' % page).json()
if len(issues_json) == 0:
break
page = page + 1
issues.extend(issues_json)
sys.stdout.write(" done, found %d issues\n" % len(issues))
sys.stdout.flush()
with open(all_issues_fname, 'w') as fp:
json.dump(issues, fp)
sys.stdout.write("Loading issue comments ")
sys.stdout.flush()
for issue in issues:
issue_fname = os.path.join(output_dir, 'issue-%d.json' % issue['number'])
if os.path.exists(issue_fname):
sys.stdout.write('s')
sys.stdout.flush()
continue
comments_json = get(issue['comments_url']).json()
issue['comments'] = comments_json
with open(issue_fname, 'w') as fp:
json.dump(issue, fp)
sys.stdout.write('.')
sys.stdout.flush()
sys.stdout.write(" done\n")
sys.stdout.flush()
def run():
if len(sys.argv) != 4:
sys.stderr.write("Usage: %s <api_token> <owner/repo> <output_dir>\n" % sys.argv[0])
sys.exit(-1)
api_token = sys.argv[1]
repo = sys.argv[2]
output_dir = sys.argv[3]
if '/' not in repo:
sys.stderr.write("Repository should be in format <owner/repo>\n")
sys.exit(-1)
if not os.path.exists(output_dir):
os.mkdir(output_dir)
if not os.path.isdir(output_dir):
sys.stderr.write("%s is not a directory\n" % output_dir)
sys.exit(-1)
make_backup(api_token, repo, output_dir)
if __name__ == '__main__':
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment