Skip to content

Instantly share code, notes, and snippets.

@AntumDeluge
Last active March 15, 2022 11:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AntumDeluge/fbf943541e6f6be50c7c0c155dd07ab0 to your computer and use it in GitHub Desktop.
Save AntumDeluge/fbf943541e6f6be50c7c0c155dd07ab0 to your computer and use it in GitHub Desktop.
Script to clean leading & trailing whitespace in text files.
#!/usr/bin/env python
## The MIT License (MIT)
#
# Copyright © 2022 Jordan Irwin (AntumDeluge)
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
# of the Software, and to permit persons to whom the Software is furnished to do
# so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
## SOFTWARE.
import os, sys, errno, platform, codecs, traceback
from enum import Enum
## Converts a type to string.
def toString(t, delim=None):
t_type = type(t);
if t_type == tuple or t_type == list:
endcaps = "()";
if t_type == list:
endcaps = "[]";
if delim == None:
tmp = endcaps[0];
for v in t:
if tmp != endcaps[0]:
tmp = tmp + ",";
tmp = tmp + str(v);
t = tmp + endcaps[1];
else:
tmp = [];
for v in t:
tmp.append(str(v));
t = delim.join(tmp);
return t;
## Debugging output level.
class Level(Enum):
SILENT = 0;
ERROR = 1;
WARN = 2;
INFO = 3;
DEBUG = 4;
VERBOSE = 5;
## Message printing/logging.
#
# @param msg
# Message to be printed to console.
# @param lvl
# Debugging level (default: INFO).
def log(msg=None, lvl=Level.INFO):
if (type(lvl) != int):
lvl = lvl.value;
if lvl == Level.SILENT.value:
return;
global_level = options["level"];
if (type(global_level) != int):
global_level = global_level.value;
if global_level == Level.SILENT.value or lvl > global_level:
return;
# write newline for empty messages
if msg == None:
sys.stdout.write("\n");
return;
if type(msg) != str:
msg = toString(msg);
stream = sys.stdout;
prefix = "";
if lvl == Level.ERROR.value:
stream = sys.stderr;
prefix = "ERROR";
elif lvl == Level.WARN.value:
prefix = "WARNING";
if prefix != "":
prefix = prefix + ": ";
stream.write(prefix + msg + "\n");
ver_py_min = (3, 0, 0);
ver_py = [];
# convert version tuple to integer values
for v in platform.python_version_tuple():
ver_py.append(int(v));
ver_py = tuple(ver_py);
if ver_py < ver_py_min:
msg = "\nERROR: incompatible Python version " + toString(ver_py, ".") + "\n \
requires version " + toString(ver_py_min, ".") + " or later"
log(msg);
sys.exit(1);
file_exe = os.path.basename(__file__);
dir_tools = os.path.normpath(os.path.dirname(__file__));
dir_root = os.getcwd();
scount_default = 4;
level_default = Level.INFO;
# flag type options
flags = {
"help": False,
"fake": False,
"convert-le": False,
"notrail": False,
"nolead": False,
}
# all available parameters ("None" denotes parameter is required)
options = {
"dirs": None,
"filetypes": None,
"scount": scount_default,
"level": level_default.value,
}
# add flags to all parameters
for opt in flags:
options[opt] = flags[opt];
# alternative shorthands for parameters
short_options = {
"h": "help",
"d": "dirs",
"f": "filetypes",
"s": "scount",
"x": "fake",
"l": "level",
}
class OptType(Enum):
NONE = ("none", "");
FLAG = ("flag", "");
BOOL = ("bool", "");
STRING = ("string", "");
LIST = ("list", "");
INT = ("int", "");
def __init__(self, identifier, description):
super();
self.__name__ = identifier;
self.desc = description;
# FIXME: doesn't work
def __eq__(self, other):
self.equals(other);
def equals(self, other):
# treat tuples & lists the same
if (other == tuple):
other = list;
return other.__name__ == self.__name__;
def getId(self):
return self.__name__;
def getDescription(self):
return self.desc;
def getOptionType(opt):
if (opt not in options):
return OptType.NONE;
if (opt in flags):
return OptType.FLAG;
val = options[opt];
# default to list
if (val == None):
return OptType.LIST;
for t in OptType:
if (t.equals(type(val))):
return t;
return OptType.NONE;
def toBoolean(val):
if (type(val) == bool):
return val;
if (val.lower() in ("y", "yes", "true")):
return True;
return False;
def toInt(val):
return int(val);
def optionIsRequired(opt):
return opt in req_options;
def setOption(opt, value):
options[opt] = value;
## Displays usage help text.
def showUsage():
msg = "\nUsage:" \
+ "\n {} -f <filtypes>[ -d <dirs>][ <flags>]".format(file_exe) \
+ "\n {} -h".format(file_exe) \
+ "\n\nOptions:" \
+ "\n -f|--filetypes:\tComma-seprated list of filename extensions to parse." \
+ "\n -d|--dirs:\t\tComma-separated list of directories to search." \
+ "\n -s|--scount:\t\tNumber of leading spaces to replace with tab" \
+ " (default: {}).".format(scount_default) \
+ "\n -l|--level:\t\tLogging level (default: {}).".format(level_default.value) \
+ "\n\nFlags:" \
+ "\n -h|--help:\t\tShow usage information." \
+ "\n -x|--fake:\t\tSimulate (don't apply changes)." \
+ "\n --convert-le:\t\tConvert CR/CRLF line endings to LF." \
+ "\n --nolead:\t\tDon't replace leading spaces with tabs." \
+ "\n --notrail:\t\tDon't clean trailing whitespace."
log(msg);
## Displays an error message & exits the process.
#
# @param code
# Exit code to use.
# @param msg
# Message to display.
# @param[opt] usage
# If <code>True</code>, shows usage information before exiting.
def exitWithError(code, msg, usage=True):
log();
log(msg, Level.ERROR);
if (usage):
showUsage();
sys.exit(code);
## Compatibility function for case matching in different Python versions.
#
# @param match
# The statement to be compared.
# @param cases
# Cases to be checked.
def switch(match, cases, default=None):
if (match in cases):
return cases[match];
if (default != None):
return default;
return match;
## Parses command line arguments & sets up file & directory options.
#
# @param args
# List of args to parse.
def parseArgs(args, flags=False):
idx = 0;
while (idx < len(args)):
cur_arg = args[idx];
s_arg = cur_arg[0] == "-" and cur_arg.count("-") == 1;
l_arg = cur_arg[0:2] == "--" and len(cur_arg) - len(cur_arg.lstrip("-")) == 2;
# all accepted arguments use a switch ("-")
if not (s_arg or l_arg):
exitWithError(errno.EINVAL, "malformatted argument: {}".format(cur_arg));
cur_arg = cur_arg.lstrip("-");
if (s_arg):
if (len(cur_arg) > 1):
# parse individual short args that are grouped together
grouped_flags = [];
for c in cur_arg:
# make sure flags got split into individual characters
if (len(c) > 1):
exitWithError(1, "internal error parsing grouped flags: {}".format(cur_arg));
grouped_flags.append("-" + c);
parseArgs(grouped_flags, True);
idx = idx + 1
continue;
else:
cur_arg = switch(cur_arg, short_options);
if (not (cur_arg in options)):
exitWithError(errno.EINVAL, "unknown argument: {}".format(cur_arg));
otype = OptType.getOptionType(cur_arg);
if (not otype.equals(OptType.FLAG)):
if (flags):
exitWithError(1, "argument \"{}\" is not a flag type & cannot be grouped".format(cur_arg));
# arguments must have a parameter
if (len(args) == idx + 1):
exitWithError(1, "argument \"{}\" requires a value".format(cur_arg));
val = None;
if (otype.equals(OptType.FLAG)):
val = True;
else:
if (not args[idx + 1].startswith("-")):
# value is located at next index
val = args[idx + 1];
idx = idx + 1;
if (otype.equals(OptType.BOOL)):
val = OptType.toBoolean(val);
elif (otype.equals(OptType.INT)):
val = OptType.toInt(val);
elif (otype.equals(OptType.LIST)):
if ("," in val):
val = tuple(val.split(","));
else:
val = tuple([val]);
options[cur_arg] = val;
idx = idx + 1;
if (len(sys.argv) == 1):
exitWithError(1, "missing parameters");
parseArgs(sys.argv[1:]);
if (options["help"]):
showUsage();
sys.exit(0);
single_file = False;
in_paths = options["dirs"];
if (in_paths and len(in_paths) == 1 and os.path.isfile(in_paths[0])):
single_file = True;
for opt in options:
if (opt == "filetypes" and single_file):
continue;
if (options[opt] == None):
exitWithError(1, "missing required argument: {}".format(opt));
# the actual work
apply_changes = not options["fake"];
if (not apply_changes):
log("\nsimulation run, changes will not be applied\n");
scount = options["scount"];
spaces_prefix = "";
for num in range(scount):
spaces_prefix = spaces_prefix + " ";
def replaceLeadingSpaces(line):
lcontent = line.lstrip();
idx = len(line) - len(lcontent);
pre = line[:idx];
if (spaces_prefix in pre):
pre = "\t".join(pre.split(spaces_prefix));
return pre + lcontent;
cleaned_count = 0;
def checkFile(f):
# not sure why this needs declared here
global cleaned_count;
contents_orig = None;
contents_new = [];
try:
buffer = codecs.open(f, "r", "utf-8");
if (not buffer):
exitWithError(1, "could not open file for reading: {}".format(f), False);
contents_orig = buffer.read();
buffer.close();
except UnicodeDecodeError:
exitWithError(1, \
"could not read file {}, please check that it is a text file".format(f), \
False);
st_orig = contents_orig;
# line endings to be written to output
le = "\n";
le_name = "LF";
# ensure we are working with LF line endings
if ("\r\n" in contents_orig):
contents_orig = contents_orig.replace("\r\n", "\n");
# preserve line endings
if (not options["convert-le"]):
le = "\r\n";
le_name = "CRLF";
if ("\r" in contents_orig):
contents_orig = contents_orig.replace("\r", "\n");
# preserve line endings
if (not options["convert-le"]):
le = "\r";
le_name = "CR";
contents_orig = tuple(contents_orig.split("\n"));
idx = 0;
for line in contents_orig:
line_orig = line;
if (not options["notrail"]):
# clean trailing whitespace
line = line.rstrip();
if (not options["nolead"]):
# replace leading spaces with tabs
line = replaceLeadingSpaces(line);
if (line != line_orig):
log("cleaned line {} ({})".format(idx + 1, f), Level.DEBUG);
contents_new.append(line);
idx = idx + 1;
st_new = None;
if (le != "\n"):
log("preserving line endings \"{}\" in file: {}".format(le_name, f), Level.DEBUG);
st_new = le.join(contents_new);
else:
st_new = "\n".join(contents_new);
if (st_new != st_orig):
try:
if (apply_changes):
buffer = codecs.open(f, "w", "utf-8");
buffer.write(st_new);
buffer.close();
cleaned_count = cleaned_count + 1;
log("updated file: {}".format(f));
except:
exitWithError(1, "error while opening file for writing: {}\n{}" \
.format(f, traceback.format_exc()), False);
if (single_file):
checkFile(in_paths[0]);
else:
# check that all directories exist before doing anything
for d in in_paths:
if (not os.path.isdir(d)):
exitWithError(errno.ENOENT, "file or directory not found: {}".format(d));
for d in in_paths:
for ROOT, DIRS, FILES in os.walk(d):
for FILE in FILES:
f = os.path.join(ROOT, FILE);
fsuffix = f.split(".")[-1];
if (fsuffix in options["filetypes"]):
checkFile(f);
log("\ncleaned {} files".format(cleaned_count));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment