Skip to content

Instantly share code, notes, and snippets.

@rca
Created Feb 13, 2018
Embed
What would you like to do?
get rid of whitespace in your uncommitted work
#!/usr/bin/env python2.7
"""
Ensure new lines added to git do not have whitespace
"""
import os
import sys
import time
import traceback
from optparse import OptionParser
from shutil import copy
from subprocess import PIPE, Popen
GIT_STATUS = ('git', 'status', '-s', '-uno')
GIT_DIFF = ('git', 'diff')
GIT_DIFF_CACHED = GIT_DIFF + ('--cached',)
PATCH_INFO = {
'old_start_line': None,
'old_line_count': None,
'new_start_line': None,
'new_line_count': None,
'description': None,
'lines': None,
}
PATCH_LINE = {
'type': None,
'line': None,
'line_number': None,
}
PATCH_LINE_TYPE = {
' ': 'context',
'+': 'add',
'-': 'remove',
'\\': 'context',
}
PATCH_LINE_TYPE_INCREMENT = {
' ': 1,
'+': 1,
'-': 0,
'\\': 0,
}
def get_new_patch_info():
patch_info = PATCH_INFO.copy()
patch_info['lines'] = []
return patch_info
def get_new_patch_line():
return PATCH_LINE.copy()
class Diff(object):
"""
Super basic diff parser
"""
def __init__(self, diff):
self.diff = diff
self.diff_info = []
self.patches = []
self.old_path = None
self.new_path = None
self.num_path_remove = 1
self.process_diff()
def add_line(self, lines, patch_line):
line_number = patch_line['line_number'] - 1
lines.insert(line_number, patch_line['line'])
def apply(self, buf=None):
if buf is None:
path = '/'.join(self.new_path.split('/')[self.num_path_remove:])
f = open(path, 'rb')
buf = f.read()
f.close()
buf_lines = buf.splitlines()
curr_line = None
for patch in self.patches:
for line in patch['lines']:
if line['line'] == ' No newline at end of file':
continue
line_number = line['line_number'] - 1
if line['type'] == 'remove':
self.remove_line(buf_lines, line)
elif line['type'] == 'add':
self.add_line(buf_lines, line)
elif line['type'] == 'context':
self.context_line(buf_lines, line)
if buf.endswith('\n'):
end = '\n'
else:
end = ''
return '\n'.join(buf_lines) + end
def context_line(self, lines, patch_line):
"""assert the lines are the same"""
line_number = patch_line['line_number'] - 1
assert lines[line_number] == patch_line['line'], \
'line: %s, "%s" != "%s"' % (line_number+1, lines[line_number], patch_line['line'])
def process_diff(self):
"""Parse the given diff string"""
curr_patch = None
found_diff_start = False
found_line_info = False
line_number = None
for line in self.diff.splitlines():
if line.startswith('--- '):
found_diff_start = True
self.old_path = line[4:]
elif line.startswith('+++ '):
self.new_path = line[4:]
elif line.startswith('@@'):
if curr_patch:
self.patches.append(curr_patch)
curr_patch = get_new_patch_info()
found_line_info = True
line_split = line.split('@@')
curr_patch['description'] = line_split[2].strip()
line_info = line_split[1].strip().split()
# when the only thing done in a diff is remove a file, there is
# no <start_line>,<line_count> line, instead it's simply the
# start line. catch the ValueError that is raised when trying
# to chunk up the split and set the old line count to 0
try:
split_line = line_info[0][1:].split(',')
#print split_line
curr_patch['old_start_line'], curr_patch['old_line_count'] = split_line
except ValueError:
curr_patch['old_start_line'] = split_line[0]
curr_patch['old_line_count'] = 0
curr_patch['new_start_line'], curr_patch['new_line_count'] = line_info[1][1:].split(',')
line_number = int(curr_patch['new_start_line'])
elif found_line_info:
patch_line = get_new_patch_line()
patch_line['line_number'] = line_number
line_number += PATCH_LINE_TYPE_INCREMENT[line[0]]
patch_line['type'] = PATCH_LINE_TYPE[line[0]]
patch_line['line'] = line[1:]
curr_patch['lines'].append(patch_line)
elif not found_diff_start:
self.diff_info.append(line)
else:
raise Exception('Unknown line: "%s"' % (line,))
# add the last patch to the list
if curr_patch:
self.patches.append(curr_patch)
def remove_line(self, lines, patch_line):
line_number = patch_line['line_number'] - 1
del buf_lines[line_number]
class GitDiff(Diff):
"""
Removes trailing whitespace from added lines
"""
def add_line(self, lines, patch_line):
"""Don't really add the line, assert it's the same and rstrip it"""
line_number = patch_line['line_number'] - 1
assert lines[line_number] == patch_line['line']
lines[line_number] = lines[line_number].rstrip()
def remove_line(self, lines, patch_line):
"""Don't really remove the line as we are just stripping whitespace"""
pass
def backup(filename):
suffix = '.' + time.strftime('%Y%m%d-%H%M%S')
copy(filename, filename + suffix)
def run_command(command):
proc = Popen(command, stdout=PIPE)
return proc.communicate()[0]
def process_diff(filename, cached, backup_original=True):
if cached:
command = GIT_DIFF_CACHED
else:
command = GIT_DIFF
command += (filename,)
diff = GitDiff(run_command(command))
if backup_original:
backup(filename)
buf = diff.apply()
f = open(filename, 'wb')
f.write(buf)
f.close()
def run(args=None):
parser = OptionParser()
parser.add_option('--no-backup', action='store_false', dest='backup',
default=True, help='Do not create a backup file of the original')
options, args = parser.parse_args(args or sys.argv[1:])
# find the .git directory
curr_dir = os.getcwd()
last_dir = None
if not os.environ.get('GIT_DIR', None):
found = False
while curr_dir != last_dir:
git_dir = os.path.join(curr_dir, '.git')
if os.path.exists(git_dir):
found = True
break
last_dir = curr_dir
curr_dir = os.path.dirname(curr_dir)
if not found:
sys.exit('git directory not found; cannot continue')
os.chdir(curr_dir)
for line in run_command(GIT_STATUS).splitlines():
filename = line[3:]
for pos, status in enumerate(line[:2]):
cached = False
if pos == 0:
cached = True
if status == ' ':
continue
try:
process_diff(filename, cached, backup_original=options.backup)
except AssertionError, msg:
traceback.print_exc()
print "Error processing diff on %s (%s)" % (filename, msg)
if __name__ == '__main__':
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment