Skip to content

Instantly share code, notes, and snippets.

@JakeWharton
Created May 19, 2011 19:10
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save JakeWharton/981486 to your computer and use it in GitHub Desktop.
Save JakeWharton/981486 to your computer and use it in GitHub Desktop.
Python script to fix common CRLF and Unicode problems when working with Visual Studio and git.
#!/usr/bin/env python
import sys
if sys.version_info < (2, 6):
raise RuntimeError("Python 2.6+ is required.")
import codecs
import logging
import optparse
import os
import unittest
CRLF_OFFSET_ERROR = '\r\0\r\n\0'
CRLF_OFFSET_FIX = '\r\0\n\0'
def fsckByteString(content=None, log=None):
if not content:
raise ArgumentException('Content must not be empty.')
if content.startswith(codecs.BOM_UTF16):
if log: log.info('Detected UTF-16 BOM.')
if CRLF_OFFSET_ERROR in content:
if log: log.error('Byte shift due to improper line ending conversion!')
if log: log.info('Correcting line endings...')
content = content.replace(CRLF_OFFSET_ERROR, CRLF_OFFSET_FIX)
if log: log.info('Converting to UTF-8...')
return content.decode("utf16").encode("utf8")
if content.startswith(codecs.BOM_UTF8):
if log: log.warn('Detected unneccessary UTF-8 BOM.')
if log: log.info('Removing BOM...')
return content[len(codecs.BOM_UTF8):]
if log: log.info('No action required.')
return content
class fscker(unittest.TestCase):
DATA = "simple\r\ntest\r\nof\r\nencodings"
EXPECTED = DATA.encode("utf8")
def test_valid_utf8(self):
value = self.DATA.encode("utf8")
actual = fsckByteString(value)
self.assertEqual(self.EXPECTED, actual)
def test_valid_utf8_with_bom(self):
value = codecs.BOM_UTF8 + self.DATA.encode("utf8")
actual = fsckByteString(value)
self.assertEqual(self.EXPECTED, actual)
def test_valid_utf16_to_utf8(self):
value = self.DATA.encode("utf16")
actual = fsckByteString(value)
self.assertEqual(self.EXPECTED, actual)
def test_invalid_utf16_to_utf8(self):
value = self.DATA.encode("utf16").replace('\n', '\r\n')
actual = fsckByteString(value)
self.assertEqual(self.EXPECTED, actual)
if __name__ == '__main__':
parser = optparse.OptionParser(usage='Usage: %prog [options] file1 [... fileN]')
parser.add_option('--test', dest='is_testing', action='store_true', default=False, help='run test suite')
options, files = parser.parse_args()
logging.basicConfig(format='%(name)s %(levelname)s: %(message)s', level=logging.INFO)
print
if options.is_testing:
unittest.TextTestRunner(verbosity=2).run(unittest.TestSuite(
unittest.defaultTestLoader.loadTestsFromTestCase(fscker)
))
elif files:
for fname in files:
log = logging.getLogger(os.path.basename(fname))
try:
content = None
with open(fname, 'rb') as f:
content = fsckByteString(f.read(), log)
with open(fname, 'wb') as f:
f.write(content)
except Exception, e:
log.error('"%s" could not be checked.', fname)
log.error(e)
print
else:
parser.print_help()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment