Skip to content

Instantly share code, notes, and snippets.

@asbjornu
Last active August 29, 2015 14:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save asbjornu/a73c8ab7a2f733375d06 to your computer and use it in GitHub Desktop.
Save asbjornu/a73c8ab7a2f733375d06 to your computer and use it in GitHub Desktop.
Enforce UTF-8 Without BOM In All C# Files
#!/usr/local/bin/python
# -*- coding: utf-8 -*-
import os
import glob
import fnmatch
import codecs
from chardet.universaldetector import UniversalDetector
# from http://farmdev.com/talks/unicode/
def to_unicode_or_bust(obj, encoding='utf-8'):
if isinstance(obj, basestring):
if not isinstance(obj, unicode):
obj = unicode(obj, encoding)
return obj
def enforce_unicode():
detector = UniversalDetector()
for root, dirnames, filenames in os.walk('.'):
for filename in fnmatch.filter(filenames, '*.cs'):
detector.reset()
filepath = os.path.join(root, filename)
with open(filepath, 'r') as f:
for line in f:
detector.feed(line)
if detector.done: break
detector.close()
encoding = detector.result['encoding']
# print filepath.ljust(120),
# print encoding.ljust(10),
# print detector.result['confidence']
if encoding and encoding != 'UTF-8':
print '%s -> UTF-8 %s' % (encoding.ljust(12), filepath.ljust(80)),
with codecs.open(filepath, 'r', encoding=encoding) as f:
content = ''.join(f.readlines())
content = to_unicode_or_bust(content)
print (type(content), content[85:90]),
content = content.encode('utf-8')
print (type(content), content[85:90])
with open(filepath, 'w') as f:
f.write(content)
enforce_unicode()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment