Last active
August 29, 2015 14:14
-
-
Save asbjornu/a73c8ab7a2f733375d06 to your computer and use it in GitHub Desktop.
Enforce UTF-8 Without BOM In All C# Files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python | |
# -*- coding: utf-8 -*- | |
import os | |
import glob | |
import fnmatch | |
import codecs | |
from chardet.universaldetector import UniversalDetector | |
# from http://farmdev.com/talks/unicode/ | |
def to_unicode_or_bust(obj, encoding='utf-8'): | |
if isinstance(obj, basestring): | |
if not isinstance(obj, unicode): | |
obj = unicode(obj, encoding) | |
return obj | |
def enforce_unicode(): | |
detector = UniversalDetector() | |
for root, dirnames, filenames in os.walk('.'): | |
for filename in fnmatch.filter(filenames, '*.cs'): | |
detector.reset() | |
filepath = os.path.join(root, filename) | |
with open(filepath, 'r') as f: | |
for line in f: | |
detector.feed(line) | |
if detector.done: break | |
detector.close() | |
encoding = detector.result['encoding'] | |
# print filepath.ljust(120), | |
# print encoding.ljust(10), | |
# print detector.result['confidence'] | |
if encoding and encoding != 'UTF-8': | |
print '%s -> UTF-8 %s' % (encoding.ljust(12), filepath.ljust(80)), | |
with codecs.open(filepath, 'r', encoding=encoding) as f: | |
content = ''.join(f.readlines()) | |
content = to_unicode_or_bust(content) | |
print (type(content), content[85:90]), | |
content = content.encode('utf-8') | |
print (type(content), content[85:90]) | |
with open(filepath, 'w') as f: | |
f.write(content) | |
enforce_unicode() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment