Last active
August 18, 2017 12:03
-
-
Save sigmaris/673331f56057b1c9cd5b10c1167ed0e1 to your computer and use it in GitHub Desktop.
Linter for any plain text files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import argparse | |
import fnmatch | |
import os | |
import re | |
import sys | |
MIXED_TABS_SPACES = re.compile(r"^\t* +(\t+)") | |
TRAILING_WHITESPACE = re.compile(r"(.*)([ \t\r\f\v]+)$") | |
TEXTCHARS = bytearray({7,8,9,10,12,13,27} | set(range(0x20, 0x100)) - {0x7f}) | |
parser = argparse.ArgumentParser(description='Lint general text files without parsing code.') | |
parser.add_argument('directory', help='directory to walk') | |
parser.add_argument('-e', '--exclude', action='append', help='exclude this glob pattern', | |
default=[]) | |
args = parser.parse_args() | |
def is_binary_string(bytes): | |
"""Does the string look like binary data, i.e. non-text?""" | |
return bool(bytes.translate(None, TEXTCHARS)) | |
def mixed_indent_line(line): | |
"""Checks for spaces and then tabs in indentation""" | |
m = MIXED_TABS_SPACES.match(line) | |
if m: | |
return "{}: E101 indentation contains mixed spaces and tabs".format(m.start(1)) | |
def trailing_whitespace(line): | |
"""Checks for whitespace just before end of a line""" | |
m = TRAILING_WHITESPACE.match(line) | |
if m: | |
if m.group(1): | |
return "{}: W291 trailing whitespace".format(m.start(2)) | |
else: | |
return "1: W293 blank line contains whitespace" | |
def final_newline(line): | |
"""Checks for a final newline in a file""" | |
if not line.endswith('\n'): | |
return "1: W292 no newline at end of file" | |
def mixed_indent_file(line, file_indent_type): | |
"""Checks for lines that don't match the file's indent style""" | |
line_indent_type, result = None, None | |
if line.startswith(' '): | |
line_indent_type = 'space' | |
elif line.startswith('\t'): | |
line_indent_type = 'tab' | |
if ( | |
file_indent_type is not None | |
and line_indent_type is not None | |
and line_indent_type != file_indent_type | |
): | |
result = "1: E101 indentation contains mixed spaces and tabs" | |
return line_indent_type, result | |
def output(filename, line_no, result): | |
if result is not None: | |
print("{0}:{1}:{2}".format(filename, line_no, result)) | |
for dirpath, dirnames, filenames in os.walk(args.directory): | |
for name in filenames: | |
fullname = os.path.join(dirpath, name) | |
if any(fnmatch.fnmatch(name, exclude) for exclude in args.exclude): | |
continue | |
with open(fullname, 'rb') as infile: | |
if is_binary_string(infile.read(1024)): | |
continue | |
infile.seek(0) | |
line_no = 1 | |
last_line = '\n' | |
file_indent_type = None | |
for line in infile: | |
line_indent_type, result = mixed_indent_file(line, file_indent_type) | |
if file_indent_type is None: | |
file_indent_type = line_indent_type | |
results = ( | |
result, | |
mixed_indent_line(line), | |
trailing_whitespace(line), | |
) | |
for result in results: | |
output(infile.name, line_no, result) | |
line_no += 1 | |
last_line = line | |
output(infile.name, line_no, final_newline(last_line)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment