Skip to content

Instantly share code, notes, and snippets.

@jamesls
Last active August 29, 2015 14:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jamesls/1b12afbbc56a1791bfd7 to your computer and use it in GitHub Desktop.
Save jamesls/1b12afbbc56a1791bfd7 to your computer and use it in GitHub Desktop.
PR Check Script
#!/usr/bin/env python
"""Script to do an intial review of a PR.
There are many things that a core dev will look at when reviewing a pull
request. Many of these things will require the expertise of someone who is
familiar with the codebase.
There are also things that can be entirely automated.
That's the point of this script. To get all the checks that can be automated,
well...automated, so that developers can focus on the parts of the review that
actually require a human to look at.
To simplify things, this script only works (or as has been tested) on
python 2.7.
How To Use
==========
The idea is that you can either run this script as a reviewer or as a
someone about to submit a PR. As a reviewer:
1. Pull down the remote/branch for the PR: hub checkout <pr link>
2. Compare against a base branch, 99.99% of the time this is develop.
3. Copy/paste the output into the PR
From the perspective of someone about to send a PR, run this script
before submitting the PR and ensure this script reports no errors.
This script will be running tests and checking out different
branches, so be sure that you've commited all the changes you want,
and that there are no pending changes!
"""
import sys
import argparse
try:
import flake8.main
import coverage
import sh
import pylint
import unidiff
import six
from flake8.engine import get_style_guide
from flake8.main import DEFAULT_CONFIG
from pep8 import DiffReport
except ImportError as e:
sys.stderr.write("Could not import module, please install: %s\n" % e)
sys.exit(1)
def check_flake8(context):
for filename in context.changed_files:
_check_flake8_single_file(filename, context)
def _check_flake8_single_file(filename, context):
options = {
'selected_lines': context.changed_files,
'reporter': DiffReport,
'diff': True,
}
flake8_style = get_style_guide(
config_file=DEFAULT_CONFIG, max_complexity=-1, **options)
sys.stdout = six.StringIO()
try:
report = flake8_style.check_files([filename])
finally:
sys.stdout = sys.__stdout__
for error_tuple in report._deferred_print:
context.record_error(report.filename, *error_tuple[:-1])
def check_pylint(context):
pass
def check_coverage(context):
verify_clean_git_tree(context)
print("Generating coverage data...")
try:
sh.nosetests('tests/unit', '--with-coverage', '--cover-erase',
'--cover-package', context.package_name)
except Exception as e:
raise RuntimeError(
"%s\nCould not generate coverage data, unit tests failed.\n" %
(e.stderr))
cov = coverage.coverage()
cov.load()
for filename in context.changed_files:
if filename.startswith('tests'):
continue
if not context.changed_files[filename]:
# There's no additions in this file, so we can skip it.
continue
report = cov.analysis(filename)
uncovered_lines = set(report[2])
changed_lines = set(context.changed_files[filename])
# This is a list of line numbers that were added but are missing
# test coverage.
lines_missing_tests = changed_lines.intersection(uncovered_lines)
if lines_missing_tests:
missing_lines_str = _pretty_print_int_range(lines_missing_tests)
context.record_error(
filename, missing_lines_str, -1,
'missing-coverage', 'Added lines have no test coverage.')
def _pretty_print_int_range(integer_set):
# Given a set of integers, create a CSV of ranges where consecutive
# its are collapsed.
lines = list(sorted(integer_set))
ranges = []
start_range = None
for el in lines:
if start_range is None:
start_range = el
last_range = el
else:
if el - 1 == last_range:
last_range = el
else:
# We've hit a range boundary
if start_range != last_range:
ranges.append('%s-%s' % (start_range, last_range))
else:
ranges.append(str(start_range))
start_range = el
last_range = el
if start_range != last_range:
ranges.append('%s-%s' % (start_range, last_range))
else:
ranges.append(str(start_range))
start_range = el
last_range = el
return ','.join(ranges)
def verify_clean_git_tree(context):
# Verify we can cleanly "git checkout" a branch. We can't have any
# modified files, etc.
files = [line.strip().split(None, 1) for line in
context.git.status('--short').splitlines()
if not line.strip().startswith('??')]
if files:
raise RuntimeError(
"You have uncommited changes. Please ensure you have a clean "
"working repository before running this command.")
def check_has_tests(context):
has_tests = any(
name.startswith('test') for name in context.changed_filenames)
# This won't catch things like fixing comments/docstrings, but for
# now we use a simple check.
has_code_changes = any(
name.endswith('.py') and not name.startswith('test')
for name in context.changed_filenames)
if has_code_changes and not has_tests:
context.record_error('<project>', 0, -1, 'missing-tests:',
'Diff does not have any tests.')
class Context(object):
def __init__(self):
# This is a dict of filename->list of line numbers added.
# If a filename is in this dict, it means it has changes.
# If the changed_file is associated with an empty list, it
# means the file was changed but has no added lines. Likely
# its diff was just deletions.
self.changed_files = {}
self.errors_by_file = {}
# Mapping of filename -> unified diff.
self.files_diff = {}
self.git = sh.git.bake(_tty_out=False)
self.package_name = ''
@property
def changed_filenames(self):
return list(self.changed_files)
def compute_diffs(self, base_branch):
changed_files = self.git.diff(
"--raw", "--name-only", '%s..' % base_branch).splitlines()
for filename in changed_files:
if not filename.endswith('.py'):
continue
output = self.git.diff('%s..' % base_branch, '--', filename)
self.files_diff[filename] = output
self.changed_files[filename] = self._get_changed_lines_for_file(
output)
def _get_changed_lines_for_file(self, git_diff_output):
parsed_diff = unidiff.PatchSet(git_diff_output)
added_line_numbers = []
for hunk in parsed_diff[0]:
hunk_list = list(hunk)
# Get a list of added line numbers in the target (current branch)
# file.
for el in hunk_list:
if el.is_added:
added_line_numbers.append(el.target_line_no)
return added_line_numbers
def record_error(self, filename, row, column, error_code, msg):
# column is 0 based but we need 1 based, hence the +1.
self.errors_by_file.setdefault(filename, []).append(
(row, column + 1, error_code, msg))
def report_errors(self):
for filename in self.errors_by_file:
for row, col, code, msg in self.errors_by_file[filename]:
print('%s:%s:%s: %s %s' % (filename, row, col, code, msg))
def review_pull_request(base_branch, project_name):
try:
context = Context()
context.package_name = project_name
context.compute_diffs(base_branch)
check_flake8(context)
check_pylint(context)
check_has_tests(context)
check_coverage(context)
context.report_errors()
except Exception as e:
print("%s: %s" % (e.__class__.__name__, e))
def main():
parser = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('-b', '--base-branch', default='develop',
help='The starting to commit to compare against.')
parser.add_argument('-p', '--project-name', required=True,
help='The name of the project (i.e botocore, awscli).')
args = parser.parse_args()
review_pull_request(args.base_branch, args.project_name)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment