Skip to content

Instantly share code, notes, and snippets.

@neilernst
Created December 2, 2016 17:47
Show Gist options
  • Save neilernst/3201822719f610f7732194aba9d108b3 to your computer and use it in GitHub Desktop.
Save neilernst/3201822719f610f7732194aba9d108b3 to your computer and use it in GitHub Desktop.
Python3 script to take a directory of Github issues and find the related files changed
# Retrieve, for a list of issues, the associated commits and files
# First run Octohub's offline issues plugin: https://github.com/turnkeylinux/octohub/tree/master/contrib/offline-issues
# Assumes relevant issue data stored in octohub "all" dir
# Author: @neilernst
# Copyright: SEI
# Licence: BSD
import logging, os, sys
import simplejson as json
from octohub.connection import Connection
from octohub.exceptions import ResponseError
logging.basicConfig(level=logging.INFO) # change to INFO to see details
repo_fqn = '/repos/eclipse/che' # change this to the project you care about
OCTOHUB_TOKEN=os.environ.get('OCTOHUB_TOKEN') # set this in environment prior to script
issues_dir = 'che/all' # read issue json from here
out_dir = 'che/out' # store to issue_id.json files here
for jfile in os.listdir(path=issues_dir):
# load the issues from JSON from the Octohub directory
logging.info('Reading issue # ' + jfile)
with open(issues_dir + '/' + jfile, 'r') as issue_json:
data = json.load(issue_json)
events_url = data['events_url']
if OCTOHUB_TOKEN:
conn = Connection(OCTOHUB_TOKEN) # 5000 queries/h
else:
conn = Connection() # 60 queries/h
events_url = repo_fqn + '/issues/' + jfile + '/events'
try:
response = conn.send('GET', events_url)
except ResponseError as err:
logging.error("Response error: {0}".format(err))
logging.error("Error occurred while trying " + events_url)
sys.exit()
commit_shas = []
# each issue has events, some of which have commits associated
for event in response.parsed:
if event.commit_id:
commit_shas.append(event.commit_id)
# each event with a commit has a SHA id, and a list of files
commit_files_dict = {}
for commit_sha in commit_shas:
logging.info("looking for commit " + commit_sha)
uri = repo_fqn + '/commits/' + commit_sha
try:
response = conn.send('GET', uri)
# TODO: handle case where URL doesn't exist, i.e. commit has been rebased or otherwise disappeared
except ResponseError as err:
# logging.error("Response error: {0}".format(err))
logging.error("Error occurred while trying " + uri)
if err.args[0]['message'] == "Not Found":
logging.error("URL " + uri + " returned 404 Not Found.")
break
else:
sys.exit()
files = response.parsed['files']
file_list = []
for git_file in files:
file_list.append(git_file['filename'])
commit_files_dict[commit_sha] = file_list
if len(commit_files_dict) > 0:
with open(out_dir + '/' + jfile + '.json', 'a') as out_json:
json.dump(commit_files_dict, out_json)
else:
logging.info("Skipping issue #" + jfile + ", no commits")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment