Skip to content

Instantly share code, notes, and snippets.

@sigmaris
Last active August 18, 2017 12:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sigmaris/673331f56057b1c9cd5b10c1167ed0e1 to your computer and use it in GitHub Desktop.
Save sigmaris/673331f56057b1c9cd5b10c1167ed0e1 to your computer and use it in GitHub Desktop.
Linter for any plain text files
#!/usr/bin/env python
import argparse
import fnmatch
import os
import re
import sys
MIXED_TABS_SPACES = re.compile(r"^\t* +(\t+)")
TRAILING_WHITESPACE = re.compile(r"(.*)([ \t\r\f\v]+)$")
TEXTCHARS = bytearray({7,8,9,10,12,13,27} | set(range(0x20, 0x100)) - {0x7f})
parser = argparse.ArgumentParser(description='Lint general text files without parsing code.')
parser.add_argument('directory', help='directory to walk')
parser.add_argument('-e', '--exclude', action='append', help='exclude this glob pattern',
default=[])
args = parser.parse_args()
def is_binary_string(bytes):
"""Does the string look like binary data, i.e. non-text?"""
return bool(bytes.translate(None, TEXTCHARS))
def mixed_indent_line(line):
"""Checks for spaces and then tabs in indentation"""
m = MIXED_TABS_SPACES.match(line)
if m:
return "{}: E101 indentation contains mixed spaces and tabs".format(m.start(1))
def trailing_whitespace(line):
"""Checks for whitespace just before end of a line"""
m = TRAILING_WHITESPACE.match(line)
if m:
if m.group(1):
return "{}: W291 trailing whitespace".format(m.start(2))
else:
return "1: W293 blank line contains whitespace"
def final_newline(line):
"""Checks for a final newline in a file"""
if not line.endswith('\n'):
return "1: W292 no newline at end of file"
def mixed_indent_file(line, file_indent_type):
"""Checks for lines that don't match the file's indent style"""
line_indent_type, result = None, None
if line.startswith(' '):
line_indent_type = 'space'
elif line.startswith('\t'):
line_indent_type = 'tab'
if (
file_indent_type is not None
and line_indent_type is not None
and line_indent_type != file_indent_type
):
result = "1: E101 indentation contains mixed spaces and tabs"
return line_indent_type, result
def output(filename, line_no, result):
if result is not None:
print("{0}:{1}:{2}".format(filename, line_no, result))
for dirpath, dirnames, filenames in os.walk(args.directory):
for name in filenames:
fullname = os.path.join(dirpath, name)
if any(fnmatch.fnmatch(name, exclude) for exclude in args.exclude):
continue
with open(fullname, 'rb') as infile:
if is_binary_string(infile.read(1024)):
continue
infile.seek(0)
line_no = 1
last_line = '\n'
file_indent_type = None
for line in infile:
line_indent_type, result = mixed_indent_file(line, file_indent_type)
if file_indent_type is None:
file_indent_type = line_indent_type
results = (
result,
mixed_indent_line(line),
trailing_whitespace(line),
)
for result in results:
output(infile.name, line_no, result)
line_no += 1
last_line = line
output(infile.name, line_no, final_newline(last_line))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment