Skip to content

Instantly share code, notes, and snippets.

@rdswift
Last active March 14, 2022 22:23
Show Gist options
  • Save rdswift/aea81a7382462116388f3984794d5a3f to your computer and use it in GitHub Desktop.
Save rdswift/aea81a7382462116388f3984794d5a3f to your computer and use it in GitHub Desktop.
Picard Developer Script to Check Files Using PyLint
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Copyright (C) 2021-2022 Bob Swift
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import argparse
from collections import namedtuple
import datetime
import glob
import hashlib
import os
import subprocess
import sys
from pylint import epylint
##########################
# Script Information #
##########################
SCRIPT_NAME = 'Picard Pylint Checker'
SCRIPT_VERS = '0.2'
SCRIPT_COPYRIGHT = '2021-2022'
SCRIPT_AUTHOR = 'Bob Swift'
PACKAGE_TITLE = 'MusicBrainz Picard'
################
# Settings #
################
PYTHON_FILES_TO_CHECK = [
'picard',
'test',
]
PYTHON_FILES_TO_SKIP = [
'/'.join(['picard', 'ui', 'ui_']),
'/'.join(['test', 'data']),
]
PYLINT_OPTIONS = '--disable W0102 --disable W0221'
LAST_GOOD_FILE = 'run_pylint_passed.txt'
COMMAND_TIMEOUT = 300 # seconds
#######################
# Text to Display #
#######################
DESCRIPTION = "{0} (v{1})".format(SCRIPT_NAME, SCRIPT_VERS)
COPYRIGHT_TEXT = """\
{0} Copyright (C) {1} {2}
""".format(DESCRIPTION, SCRIPT_COPYRIGHT, SCRIPT_AUTHOR,)
ABOUT_TEXT = """\
{0}
This program provides the ability to perform a lint check on python
files to aid in the development of the {1} package.
For usage instructions, please use the '--help' option.
This program comes with ABSOLUTELY NO WARRANTY; for details use option
'--warranty'. This is free software, and you are welcome to redistribute
it under certain conditions. Please see the GPLv2 license for details.
""".format(COPYRIGHT_TEXT, PACKAGE_TITLE)
WARRANTY_TEXT = """\
{0}
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM “AS IS” WITHOUT
WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF
THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR
CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES
ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT
NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES
SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE
WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
If the disclaimer of warranty and limitation of liability provided above
cannot be given local legal effect according to their terms, reviewing
courts shall apply local law that most closely approximates an absolute
waiver of all civil liability in connection with the Program, unless a
warranty or assumption of liability accompanies a copy of the Program in
return for a fee.
""".format(COPYRIGHT_TEXT,)
USAGE = """{0} [optional arguments] FILES""".format(os.path.basename(os.path.realpath(__file__)))
HELP = """\
usage: {0}
FILES:
changed Lint changed files.
newer Lint newer files.
all Lint all files.
filespec Lint files matching GLOB 'filespec'. Enter multiple
filespecs by separating them with spaces.
Optional Arguments:
-h, --help Show this help message and exit.
-a, --about Show general information about the script.
-d, --docs Show documentation the script.
-w, --warrantee Show warrantee information.
-f, --force Force re-processing unchanged files.
-g, --git Only process files appearing in 'git status'.
-v, --verbose Verbose output.
""".format(USAGE,)
DOCUMENTATION_TEXT = """\
{0}
-----------------------
{1}
When the FILES parameter is specified as 'all', the script will check each
file in the 'picard' and 'test' subdirectories. Otherwise, the FILES
parameter will be expanded using the python GLOB functionality to produce
the list of files to be processed. Multiple GLOB filespecs can be entered
by separating them with spaces.
When the --git option is specified, the script will only process the files
also appearing in the list produced using the 'git status' command.
By default, files will first be checked against the values stored in the
'{2}' file to determine if the file should be processed.
If the hash value of the file has changed, then the file is deemed to be
newer and will be processed. This avoids re-processing files that have not
changed since the last time they passed testing. This can be overridden by
specifying the --force option to process all files specified.
After processing, the information stored in the '{2}' file
is updated automatically for each file processed.
""".format(ABOUT_TEXT, HELP, LAST_GOOD_FILE,)
##########################
# Named Tuple Helper #
##########################
CheckedStats = namedtuple('CheckedStats', ['path', 'date', 'hash'])
##############################################################################
def parse_command_line():
"""Parse the command line arguments.
"""
if len(sys.argv) == 1:
print(HELP)
exit_with_code(1)
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument(
'-a', '--about',
action='store_true',
dest='about',
help="show general information about the script",
)
arg_parser.add_argument(
'-d', '--docs',
action='store_true',
dest='docs',
help="show script documentation",
)
arg_parser.add_argument(
'-w', '--warrantee',
action='store_true',
dest='warrantee',
help="show warrantee information",
)
arg_parser.add_argument(
'-f', '--force',
action='store_true',
dest='force',
help="force re-processing unchanged files",
)
arg_parser.add_argument(
'-g', '--git',
action='store_true',
dest='git',
help="only process files appearing in 'git status'",
)
arg_parser.add_argument(
'-v', '--verbose',
action='store_true',
dest='verbose',
help="verbose output",
)
arg_parser.add_argument(
'FILES',
type=str,
nargs='*',
help="specify files for processing ('all' or GLOB filespec)",
)
args = arg_parser.parse_args()
return args
##############################################################################
def get_hash(filepath):
"""Get the hash value for the specified file.
Args:
filepath (str): Path and file name of the file to process.
Returns:
str: The hash value for the file.
"""
BUF_SIZE = 65536 # lets read stuff in 64kb chunks!
md5 = hashlib.md5()
with open(filepath, 'rb') as f:
while True:
data = f.read(BUF_SIZE)
if not data:
break
md5.update(data)
return md5.hexdigest()
##############################################################################
def get_last_good(verbose_output=False):
"""Reads the "last good" file information into a dictionary.
Args:
verbose_output (bool): Determines output level.
Returns:
dict: Dictionary of CheckedStats tuples keyed by filepath.
"""
last_good = {}
if verbose_output:
print("Reading the '{0}' file.".format(LAST_GOOD_FILE,))
with open(LAST_GOOD_FILE, 'a', encoding='utf8') as f:
pass
with open(LAST_GOOD_FILE, 'r', encoding='utf8') as f:
lines = f.readlines()
text = "Processing 'last good' entries."
line_count = len(lines)
for line_number, line in enumerate(lines, start=1):
if verbose_output:
print("\r{0} {1:>3}%".format(text, int(100 * line_number / line_count)), end='', flush=True)
if not line.strip():
continue
parts = line.strip().split(maxsplit=2)
filedate = parts[0].strip()
filehash = parts[1].strip()
filepath = parts[2].strip()
last_good[filepath] = CheckedStats(filepath, filedate, filehash)
if verbose_output:
print("\r{0} Complete ({1:,} entries).\n".format(text, len(last_good.keys()),), flush=True)
return last_good
##############################################################################
def run_pylint(last_good, check_files):
"""Process the specified files using pylint.
Args:
last_good (dict): Information from the "last good" file.
check_files (list): List of files to check.
verbose_output (bool): Determines output level.
"""
exit_code = 0
err_list = ''
file_count = len(check_files)
for counter, filename in enumerate(check_files, start=1):
print('File {0:>5,} of {1:,}: {2}'.format(counter, file_count, filename), end='', flush=True)
testtime = datetime.datetime.fromtimestamp(os.path.getmtime(filename)).strftime('%Y-%m-%d_%H:%M:%S')
testhash = get_hash(filepath=filename)
(pylint_stdout, pylint_stderr) = epylint.py_run("{0} {1}".format(filename, PYLINT_OPTIONS), return_std=True)
output = ""
for line in pylint_stdout.getvalue().split('\n'):
if line.startswith(' ----------'):
break
if not line.strip() and not output:
continue
output += line + "\n"
if output:
err_list += '\n' + output
print(" - ERROR")
exit_code += 1
else:
last_good[filename] = CheckedStats(filename, testtime, testhash)
print(" (OK)")
print("\nProcessed {0} file{1}.{2}".format(counter, '' if counter == 1 else 's', ' ' * 79,))
with open(LAST_GOOD_FILE, 'w', encoding='utf8') as f:
for key in sorted(last_good.keys()):
f.write("{0} {1} {2}\n".format(last_good[key].date, last_good[key].hash, key))
if exit_code:
print("\nFiles with errors:\n{0}".format(err_list,))
exit_with_code(exit_code)
##############################################################################
def python_files_to_check(verbose_output=False):
"""Provide expanded list of python files to check.
Returns:
list: Path and name of files.
"""
file_list = []
if verbose_output:
print("Getting list of all project files.")
for filepath in PYTHON_FILES_TO_CHECK:
for dirpath, dirnames, filenames in os.walk(filepath):
if verbose_output:
print("\rReading: {0:<69}".format(dirpath[:69],), end='', flush=True)
for filename in filenames:
if not filename[-3:] == '.py':
continue
fullpath = os.path.join(dirpath, filename).replace('\\', '/')
for skip_path in PYTHON_FILES_TO_SKIP:
if fullpath.startswith(skip_path):
fullpath = ''
break
if fullpath:
file_list.append(fullpath)
if verbose_output:
print("\r{0:<79}\n".format('Project files list complete ({0:,} entries).'.format(len(file_list),),), flush=True)
return file_list
##############################################################################
def exit_with_code(exit_code=0):
"""Print and exit with the specified exit code.
Keyword Arguments:
exit_code {int} -- Exit code to use (default: 0)
"""
print('Exit Code: {0}\n'.format(exit_code))
sys.exit(exit_code)
##############################################################################
def main():
"""Main part of script to execute.
"""
args = parse_command_line()
if 'about' in vars(args) and args.about:
print("{0}".format(ABOUT_TEXT,))
sys.exit(0)
if 'docs' in vars(args) and args.docs:
print("{0}".format(DOCUMENTATION_TEXT,))
sys.exit(0)
if 'warrantee' in vars(args) and args.warrantee:
print("{0}".format(WARRANTY_TEXT,))
sys.exit(0)
print("\n{0}\n".format(DESCRIPTION,))
if 'FILES' not in vars(args) or not args.FILES:
print("{0}".format(HELP,))
exit_with_code(1)
git_filter = bool('git' in vars(args) and args.git)
force_check = bool('force' in vars(args) and args.force)
verbose_output = bool('verbose' in vars(args) and args.verbose)
last_good = get_last_good(verbose_output)
temp = []
if git_filter:
temp.append("filtered by 'git status'")
if force_check:
temp.append('(forced checking)')
process_filter = ' '.join(temp)
git_items = []
if git_filter:
if verbose_output:
print("Running 'git status' to get updated file list.")
text = "Processing 'git status' output."
command = 'git status --porcelain'
result = subprocess.run(command, shell=True, check=False, capture_output=True, text=True, timeout=COMMAND_TIMEOUT)
if result.returncode:
print("There was a problem running 'git status'.\n")
sys.exit(1)
lines = result.stdout.splitlines(keepends=False)
line_count = len(lines)
for line_number, line in enumerate(lines, start=1):
if verbose_output:
print("\r{0} {1:>3}%".format(text, int(100 * line_number / line_count)), end='', flush=True)
parts = line.strip().split(maxsplit=2)
if len(parts) < 2:
continue
filename = parts[1]
if not filename.endswith('.py'):
continue
git_items.append(filename.replace('\\', '/'))
if verbose_output:
print("\r{0} Complete ({1:,} items).\n".format(text, len(git_items),), flush=True)
if 'all' in args.FILES:
file_list = python_files_to_check(verbose_output)
else:
file_list = []
for filepath in args.FILES:
for filename in glob.glob(filepath, recursive=True):
if filename[-3:] == '.py':
file_list.append(filename.replace('\\', '/'))
file_list = list(dict.fromkeys(file_list)) # Remove duplicate items
if git_filter:
for filename in file_list:
if filename not in git_items:
file_list.remove(filename)
if not force_check:
for filename in file_list:
if filename in last_good and get_hash(filepath=filename) == last_good[filename].hash:
file_list.remove(filename)
if not file_list:
print("No files found to check.\n")
exit_with_code(0)
file_count = len(file_list)
print("Processing {0:,} file{1}{2}{3}.".format(
file_count,
's' if file_count > 1 else '',
' ' if process_filter else '',
process_filter,
))
run_pylint(last_good, file_list)
exit_with_code(0)
##############################################################################
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment