Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
#!/usr/bin/env python
import codecs
import os
from os.path import join
import re
import sys
if len(sys.argv) < 3:
print "usage: convert_to_utf8 <path> <charset>"
sys.exit(0)
srcDir = sys.argv[1]
if not os.path.isdir(srcDir):
print "'%s' is not a directory" % srcDir
sys.exit(0)
encoding = sys.argv[2]
try:
codecs.lookup(encoding)
except LookupError:
print "'%s' is not a valid encoding" % encoding
sys.exit(0)
r = re.compile('.+\.(a4d|a4p|a4l|ini|inc|htm|html|shtml|css|js|php)$')
for root, dirs, files in os.walk(srcDir):
if '.git' in dirs:
dirs.remove('.git')
for srcFile in [f for f in files if r.search(f)]:
path = join(root, srcFile)
print "Processing %s" % path
f = open(path, 'rb')
uni = unicode(f.read(), encoding)
f.close()
f = open(path, 'wb')
f.write(uni.encode('utf-8'))
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment