Skip to content

Instantly share code, notes, and snippets.

@jongyeol
Created January 23, 2013 15:05
Show Gist options
  • Save jongyeol/4607621 to your computer and use it in GitHub Desktop.
Save jongyeol/4607621 to your computer and use it in GitHub Desktop.
detecting file encoding and line-ending, and fix it
#!/usr/bin/env python
# detecting file encoding and line-ending, and fix it
# coded by jong10
# before, you need to install chardet:
# > pip install chardet
# Usage: ./fixfileformat.py *.h *.cpp *.py
RULES = (
# line-ending, encoding, extensions
# ('DOS', 'EUC-KR', ['.cpp', '.h']),
('unix', 'utf-8', ['.cpp', '.h']),
('unix', 'utf-8', ['.py']),
)
#####################################################################
import os
import sys
import chardet
def readfile(filename):
f = open(filename, 'rb')
s = ''
buf = None
while buf != '':
buf = f.read()
s += buf
f.close()
return s
def rewrite(filename, contents, from_ff, from_enc, to_ff, to_enc):
FF = {'DOS': ['\n', '\r\n'], 'unix': ['\r\n', '\n']}
newcontents = contents
if from_ff != to_ff:
newcontents = newcontents.replace(FF[to_ff][0], FF[to_ff][1])
if from_enc != None and from_enc != to_enc:
newcontents = newcontents.decode(from_enc).encode(to_enc, 'ignore')
if newcontents != contents:
f = open(filename, 'wb')
f.write(newcontents)
f.close()
return 1
return 0
if __name__ == '__main__':
for f in sys.argv[1:]:
try:
name, ext = os.path.splitext(f)
contents = readfile(f)
ff = '\r\n' in contents and 'DOS' or 'unix'
enc = chardet.detect(contents)['encoding']
updated = sum(rewrite(f, contents, ff, enc, lineending, encoding)
for lineending, encoding, extensions in RULES
if ext in extensions)
if updated > 0:
print('%s: %s, %s => FIXED' % (f, ff, enc))
else:
print('%s: %s, %s' % (f, ff, enc))
except IOError:
print('%s: cannot open' % (f, ))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment