Created
January 23, 2013 15:05
-
-
Save jongyeol/4607621 to your computer and use it in GitHub Desktop.
detecting file encoding and line-ending, and fix it
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# detecting file encoding and line-ending, and fix it | |
# coded by jong10 | |
# before, you need to install chardet: | |
# > pip install chardet | |
# Usage: ./fixfileformat.py *.h *.cpp *.py | |
RULES = ( | |
# line-ending, encoding, extensions | |
# ('DOS', 'EUC-KR', ['.cpp', '.h']), | |
('unix', 'utf-8', ['.cpp', '.h']), | |
('unix', 'utf-8', ['.py']), | |
) | |
##################################################################### | |
import os | |
import sys | |
import chardet | |
def readfile(filename): | |
f = open(filename, 'rb') | |
s = '' | |
buf = None | |
while buf != '': | |
buf = f.read() | |
s += buf | |
f.close() | |
return s | |
def rewrite(filename, contents, from_ff, from_enc, to_ff, to_enc): | |
FF = {'DOS': ['\n', '\r\n'], 'unix': ['\r\n', '\n']} | |
newcontents = contents | |
if from_ff != to_ff: | |
newcontents = newcontents.replace(FF[to_ff][0], FF[to_ff][1]) | |
if from_enc != None and from_enc != to_enc: | |
newcontents = newcontents.decode(from_enc).encode(to_enc, 'ignore') | |
if newcontents != contents: | |
f = open(filename, 'wb') | |
f.write(newcontents) | |
f.close() | |
return 1 | |
return 0 | |
if __name__ == '__main__': | |
for f in sys.argv[1:]: | |
try: | |
name, ext = os.path.splitext(f) | |
contents = readfile(f) | |
ff = '\r\n' in contents and 'DOS' or 'unix' | |
enc = chardet.detect(contents)['encoding'] | |
updated = sum(rewrite(f, contents, ff, enc, lineending, encoding) | |
for lineending, encoding, extensions in RULES | |
if ext in extensions) | |
if updated > 0: | |
print('%s: %s, %s => FIXED' % (f, ff, enc)) | |
else: | |
print('%s: %s, %s' % (f, ff, enc)) | |
except IOError: | |
print('%s: cannot open' % (f, )) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment