Skip to content

Instantly share code, notes, and snippets.

@nevill
Last active December 6, 2017 09:39
Show Gist options
  • Save nevill/6a59ad277342bea2f8108cf55a35ba3e to your computer and use it in GitHub Desktop.
Save nevill/6a59ad277342bea2f8108cf55a35ba3e to your computer and use it in GitHub Desktop.
hooks to test if a file contains BOM, compatible with python 2 and 3
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals
import os.path
import sys
import subprocess
UTF8_BOM = b'\xef\xbb\xbf'
UTF16_BOM = b'\xff\xfe'
def _show_working_dir():
return subprocess.check_output(
[
'git',
'rev-parse',
'--show-toplevel',
]
)
def _diff_index():
return subprocess.check_output(
[
'git',
'diff-index',
'--cached',
'--diff-filter=AM',
'--name-only',
'HEAD',
]
)
def files_staged_for_commit():
result = _diff_index().decode().split('\n')
return result[0:-1]
def has_bom(bs):
if bs.startswith(UTF8_BOM) or bs.startswith(UTF16_BOM):
return True
return False
prefix = _show_working_dir().decode().strip()
for file in files_staged_for_commit():
with open(os.path.join(prefix, file), 'rb') as f:
if has_bom(f.read(3)):
print('Found BOM in', f.name)
sys.exit(1)
#!/usr/bin/env python
from __future__ import print_function
import fileinput
import sys
import subprocess
UTF8_BOM = b'\xef\xbb\xbf'
UTF16_BOM = b'\xff\xfe'
ZERO_COMMIT = '0000000000000000000000000000000000000000'
def diff_tree(rev):
# <mode> SP <type> SP <object> TAB <file>
result = subprocess.check_output(
[
'git',
'diff-tree',
'--pretty=format:',
'--diff-filter=AM',
rev
]
)
return result.decode().strip().replace('\t', ' ')
def cat_file(object_id):
result = subprocess.check_output(
[
'git',
'cat-file',
'blob',
object_id
]
)
return result
def rev_list(prev, current):
if prev == ZERO_COMMIT:
command = [
'git',
'rev-list',
current,
'--all',
'--not'
]
else:
command = [
'git',
'rev-list',
'%s..%s' % (prev, current),
'--all',
'--not'
]
result = subprocess.check_output(command)
return result.decode()
def has_bom(bs):
if bs.startswith(UTF8_BOM) or bs.startswith(UTF16_BOM):
return True
return False
def process(id):
return has_bom(cat_file(id)[0:3])
if __name__ == '__main__':
# <old-value> SP <new-value> SP <ref-name> LF
(old_rev, new_rev, ref_name) = fileinput.input().readline().split(' ')
for rev in rev_list(old_rev, new_rev).splitlines():
for line in diff_tree(rev).splitlines():
mode, t, prev_object_id, object_id, status, name = line.split(' ')
if process(object_id):
print('Error: found BOM in', name)
sys.exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment