jamesls/pr-check

## pr-check
#!/usr/bin/env python
"""Script to do an intial review of a PR.

There are many things that a core dev will look at when reviewing a pull
request.  Many of these things will require the expertise of someone who is
familiar with the codebase.

There are also things that can be entirely automated.

That's the point of this script.  To get all the checks that can be automated,
well...automated, so that developers can focus on the parts of the review that
actually require a human to look at.

To simplify things, this script only works (or as has been tested) on
python 2.7.

How To Use
==========

The idea is that you can either run this script as a reviewer or as a
someone about to submit a PR.  As a reviewer:

1. Pull down the remote/branch for the PR: hub checkout <pr link>
2. Compare against a base branch, 99.99% of the time this is develop.
3. Copy/paste the output into the PR

From the perspective of someone about to send a PR, run this script
before submitting the PR and ensure this script reports no errors.

This script will be running tests and checking out different
branches, so be sure that you've commited all the changes you want,
and that there are no pending changes!

"""
import sys
import argparse

try:
    import flake8.main
    import coverage
    import sh
    import pylint
    import unidiff
    import six
    from flake8.engine import get_style_guide
    from flake8.main import DEFAULT_CONFIG
    from pep8 import DiffReport
except ImportError as e:
    sys.stderr.write("Could not import module, please install: %s\n" % e)
    sys.exit(1)


def check_flake8(context):
    for filename in context.changed_files:
        _check_flake8_single_file(filename, context)


def _check_flake8_single_file(filename, context):
    options = {
        'selected_lines': context.changed_files,
        'reporter': DiffReport,
        'diff': True,
    }
    flake8_style = get_style_guide(
        config_file=DEFAULT_CONFIG, max_complexity=-1, **options)
    sys.stdout = six.StringIO()
    try:
        report = flake8_style.check_files([filename])
    finally:
        sys.stdout = sys.__stdout__
    for error_tuple in report._deferred_print:
        context.record_error(report.filename, *error_tuple[:-1])


def check_pylint(context):
    pass


def check_coverage(context):
    verify_clean_git_tree(context)
    print("Generating coverage data...")
    try:
        sh.nosetests('tests/unit', '--with-coverage', '--cover-erase',
                    '--cover-package', context.package_name)
    except Exception as e:
        raise RuntimeError(
            "%s\nCould not generate coverage data, unit tests failed.\n" %
            (e.stderr))
    cov = coverage.coverage()
    cov.load()
    for filename in context.changed_files:
        if filename.startswith('tests'):
            continue
        if not context.changed_files[filename]:
            # There's no additions in this file, so we can skip it.
            continue
        report = cov.analysis(filename)
        uncovered_lines = set(report[2])
        changed_lines = set(context.changed_files[filename])
        # This is a list of line numbers that were added but are missing
        # test coverage.
        lines_missing_tests = changed_lines.intersection(uncovered_lines)
        if lines_missing_tests:
            missing_lines_str = _pretty_print_int_range(lines_missing_tests)
            context.record_error(
                filename, missing_lines_str, -1,
                'missing-coverage', 'Added lines have no test coverage.')


def _pretty_print_int_range(integer_set):
    # Given a set of integers, create a CSV of ranges where consecutive
    # its are collapsed.
    lines = list(sorted(integer_set))
    ranges = []
    start_range = None
    for el in lines:
        if start_range is None:
            start_range = el
            last_range = el
        else:
            if el - 1 == last_range:
                last_range = el
            else:
                # We've hit a range boundary
                if start_range != last_range:
                    ranges.append('%s-%s' % (start_range, last_range))
                else:
                    ranges.append(str(start_range))
                start_range = el
                last_range = el
    if start_range != last_range:
        ranges.append('%s-%s' % (start_range, last_range))
    else:
        ranges.append(str(start_range))
    start_range = el
    last_range = el
    return ','.join(ranges)


def verify_clean_git_tree(context):
    # Verify we can cleanly "git checkout" a branch.  We can't have any
    # modified files, etc.
    files = [line.strip().split(None, 1) for line in
             context.git.status('--short').splitlines()
             if not line.strip().startswith('??')]
    if files:
        raise RuntimeError(
            "You have uncommited changes.  Please ensure you have a clean "
            "working repository before running this command.")


def check_has_tests(context):
    has_tests = any(
        name.startswith('test') for name in context.changed_filenames)
    # This won't catch things like fixing comments/docstrings, but for
    # now we use a simple check.
    has_code_changes = any(
        name.endswith('.py') and not name.startswith('test')
        for name in context.changed_filenames)
    if has_code_changes and not has_tests:
        context.record_error('<project>', 0, -1, 'missing-tests:',
                             'Diff does not have any tests.')


class Context(object):
    def __init__(self):
        # This is a dict of filename->list of line numbers added.
        # If a filename is in this dict, it means it has changes.
        # If the changed_file is associated with an empty list, it
        # means the file was changed but has no added lines.  Likely
        # its diff was just deletions.
        self.changed_files = {}
        self.errors_by_file = {}
        # Mapping of filename -> unified diff.
        self.files_diff = {}
        self.git = sh.git.bake(_tty_out=False)
        self.package_name = ''

    @property
    def changed_filenames(self):
        return list(self.changed_files)

    def compute_diffs(self, base_branch):
        changed_files = self.git.diff(
            "--raw", "--name-only", '%s..' % base_branch).splitlines()
        for filename in changed_files:
            if not filename.endswith('.py'):
                continue
            output = self.git.diff('%s..' % base_branch, '--', filename)
            self.files_diff[filename] = output
            self.changed_files[filename] = self._get_changed_lines_for_file(
                output)

    def _get_changed_lines_for_file(self, git_diff_output):
        parsed_diff = unidiff.PatchSet(git_diff_output)
        added_line_numbers = []
        for hunk in parsed_diff[0]:
            hunk_list = list(hunk)
            # Get a list of added line numbers in the target (current branch)
            # file.
            for el in hunk_list:
                if el.is_added:
                    added_line_numbers.append(el.target_line_no)
        return added_line_numbers

    def record_error(self, filename, row, column, error_code, msg):
        # column is 0 based but we need 1 based, hence the +1.
        self.errors_by_file.setdefault(filename, []).append(
            (row, column + 1, error_code, msg))

    def report_errors(self):
        for filename in self.errors_by_file:
            for row, col, code, msg in self.errors_by_file[filename]:
                print('%s:%s:%s: %s %s' % (filename, row, col, code, msg))


def review_pull_request(base_branch, project_name):
    try:
        context = Context()
        context.package_name = project_name
        context.compute_diffs(base_branch)
        check_flake8(context)
        check_pylint(context)
        check_has_tests(context)
        check_coverage(context)
        context.report_errors()
    except Exception as e:
        print("%s: %s" % (e.__class__.__name__, e))


def main():
    parser = argparse.ArgumentParser(description=__doc__,
                                     formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument('-b', '--base-branch', default='develop',
                        help='The starting to commit to compare against.')
    parser.add_argument('-p', '--project-name', required=True,
                        help='The name of the project (i.e botocore, awscli).')
    args = parser.parse_args()
    review_pull_request(args.base_branch, args.project_name)


main()
	#!/usr/bin/env python
	"""Script to do an intial review of a PR.

	There are many things that a core dev will look at when reviewing a pull
	request. Many of these things will require the expertise of someone who is
	familiar with the codebase.

	There are also things that can be entirely automated.

	That's the point of this script. To get all the checks that can be automated,
	well...automated, so that developers can focus on the parts of the review that
	actually require a human to look at.

	To simplify things, this script only works (or as has been tested) on
	python 2.7.

	How To Use
	==========

	The idea is that you can either run this script as a reviewer or as a
	someone about to submit a PR. As a reviewer:

	1. Pull down the remote/branch for the PR: hub checkout <pr link>
	2. Compare against a base branch, 99.99% of the time this is develop.
	3. Copy/paste the output into the PR

	From the perspective of someone about to send a PR, run this script
	before submitting the PR and ensure this script reports no errors.

	This script will be running tests and checking out different
	branches, so be sure that you've commited all the changes you want,
	and that there are no pending changes!

	"""
	import sys
	import argparse

	try:
	import flake8.main
	import coverage
	import sh
	import pylint
	import unidiff
	import six
	from flake8.engine import get_style_guide
	from flake8.main import DEFAULT_CONFIG
	from pep8 import DiffReport
	except ImportError as e:
	sys.stderr.write("Could not import module, please install: %s\n" % e)
	sys.exit(1)


	def check_flake8(context):
	for filename in context.changed_files:
	_check_flake8_single_file(filename, context)


	def _check_flake8_single_file(filename, context):
	options = {
	'selected_lines': context.changed_files,
	'reporter': DiffReport,
	'diff': True,
	}
	flake8_style = get_style_guide(
	config_file=DEFAULT_CONFIG, max_complexity=-1, **options)
	sys.stdout = six.StringIO()
	try:
	report = flake8_style.check_files([filename])
	finally:
	sys.stdout = sys.__stdout__
	for error_tuple in report._deferred_print:
	context.record_error(report.filename, *error_tuple[:-1])


	def check_pylint(context):
	pass


	def check_coverage(context):
	verify_clean_git_tree(context)
	print("Generating coverage data...")
	try:
	sh.nosetests('tests/unit', '--with-coverage', '--cover-erase',
	'--cover-package', context.package_name)
	except Exception as e:
	raise RuntimeError(
	"%s\nCould not generate coverage data, unit tests failed.\n" %
	(e.stderr))
	cov = coverage.coverage()
	cov.load()
	for filename in context.changed_files:
	if filename.startswith('tests'):
	continue
	if not context.changed_files[filename]:
	# There's no additions in this file, so we can skip it.
	continue
	report = cov.analysis(filename)
	uncovered_lines = set(report[2])
	changed_lines = set(context.changed_files[filename])
	# This is a list of line numbers that were added but are missing
	# test coverage.
	lines_missing_tests = changed_lines.intersection(uncovered_lines)
	if lines_missing_tests:
	missing_lines_str = _pretty_print_int_range(lines_missing_tests)
	context.record_error(
	filename, missing_lines_str, -1,
	'missing-coverage', 'Added lines have no test coverage.')


	def _pretty_print_int_range(integer_set):
	# Given a set of integers, create a CSV of ranges where consecutive
	# its are collapsed.
	lines = list(sorted(integer_set))
	ranges = []
	start_range = None
	for el in lines:
	if start_range is None:
	start_range = el
	last_range = el
	else:
	if el - 1 == last_range:
	last_range = el
	else:
	# We've hit a range boundary
	if start_range != last_range:
	ranges.append('%s-%s' % (start_range, last_range))
	else:
	ranges.append(str(start_range))
	start_range = el
	last_range = el
	if start_range != last_range:
	ranges.append('%s-%s' % (start_range, last_range))
	else:
	ranges.append(str(start_range))
	start_range = el
	last_range = el
	return ','.join(ranges)


	def verify_clean_git_tree(context):
	# Verify we can cleanly "git checkout" a branch. We can't have any
	# modified files, etc.
	files = [line.strip().split(None, 1) for line in
	context.git.status('--short').splitlines()
	if not line.strip().startswith('??')]
	if files:
	raise RuntimeError(
	"You have uncommited changes. Please ensure you have a clean "
	"working repository before running this command.")


	def check_has_tests(context):
	has_tests = any(
	name.startswith('test') for name in context.changed_filenames)
	# This won't catch things like fixing comments/docstrings, but for
	# now we use a simple check.
	has_code_changes = any(
	name.endswith('.py') and not name.startswith('test')
	for name in context.changed_filenames)
	if has_code_changes and not has_tests:
	context.record_error('<project>', 0, -1, 'missing-tests:',
	'Diff does not have any tests.')


	class Context(object):
	def __init__(self):
	# This is a dict of filename->list of line numbers added.
	# If a filename is in this dict, it means it has changes.
	# If the changed_file is associated with an empty list, it
	# means the file was changed but has no added lines. Likely
	# its diff was just deletions.
	self.changed_files = {}
	self.errors_by_file = {}
	# Mapping of filename -> unified diff.
	self.files_diff = {}
	self.git = sh.git.bake(_tty_out=False)
	self.package_name = ''

	@property
	def changed_filenames(self):
	return list(self.changed_files)

	def compute_diffs(self, base_branch):
	changed_files = self.git.diff(
	"--raw", "--name-only", '%s..' % base_branch).splitlines()
	for filename in changed_files:
	if not filename.endswith('.py'):
	continue
	output = self.git.diff('%s..' % base_branch, '--', filename)
	self.files_diff[filename] = output
	self.changed_files[filename] = self._get_changed_lines_for_file(
	output)

	def _get_changed_lines_for_file(self, git_diff_output):
	parsed_diff = unidiff.PatchSet(git_diff_output)
	added_line_numbers = []
	for hunk in parsed_diff[0]:
	hunk_list = list(hunk)
	# Get a list of added line numbers in the target (current branch)
	# file.
	for el in hunk_list:
	if el.is_added:
	added_line_numbers.append(el.target_line_no)
	return added_line_numbers

	def record_error(self, filename, row, column, error_code, msg):
	# column is 0 based but we need 1 based, hence the +1.
	self.errors_by_file.setdefault(filename, []).append(
	(row, column + 1, error_code, msg))

	def report_errors(self):
	for filename in self.errors_by_file:
	for row, col, code, msg in self.errors_by_file[filename]:
	print('%s:%s:%s: %s %s' % (filename, row, col, code, msg))


	def review_pull_request(base_branch, project_name):
	try:
	context = Context()
	context.package_name = project_name
	context.compute_diffs(base_branch)
	check_flake8(context)
	check_pylint(context)
	check_has_tests(context)
	check_coverage(context)
	context.report_errors()
	except Exception as e:
	print("%s: %s" % (e.__class__.__name__, e))


	def main():
	parser = argparse.ArgumentParser(description=__doc__,
	formatter_class=argparse.RawTextHelpFormatter)
	parser.add_argument('-b', '--base-branch', default='develop',
	help='The starting to commit to compare against.')
	parser.add_argument('-p', '--project-name', required=True,
	help='The name of the project (i.e botocore, awscli).')
	args = parser.parse_args()
	review_pull_request(args.base_branch, args.project_name)


	main()