Skip to content

Instantly share code, notes, and snippets.

@hjhjw1991
Created May 13, 2016 10:44
Show Gist options
  • Save hjhjw1991/e4eb3fc1c7f7b3b25c77652072ab29aa to your computer and use it in GitHub Desktop.
Save hjhjw1991/e4eb3fc1c7f7b3b25c77652072ab29aa to your computer and use it in GitHub Desktop.
convert text file to specific encoding
#!/usr/bin/env python
#-*-encoding:utf-8-*-
import chardet
import argparse
import os
parser = argparse.ArgumentParser()
parser.add_argument("dir", help="directory where all files will be converted")
parser.add_argument("-r", "--recursive", action="store_true", help="do convertion recursively")
parser.add_argument("-e", "--extension", help="only convert files with specific extension")
parser.add_argument("-E", "--encoding", help="convert files to specific encoding, supporting utf-8, gbk and unicode, default is utf-8")
args = parser.parse_args()
if args.encoding is None or args.encoding.lower() not in ['utf-8','gbk','unicode','gb2312']:
args.encoding = 'utf-8'
## must be absolute path
def getFiles(dir, ext="txt"):
if not (os.path.exists(dir) or os.path.isdir(dir)):
return
files = []
dirs = []
for file in os.listdir(dir):
abspath = '/'.join([dir,file])
if os.path.isdir(abspath):
dirs.append(abspath)
elif ext=="" or abspath[-len(ext):]==ext:
files.append(abspath)
return (dirs,files)
def convert(filepath, encoding):
if os.path.isfile(filepath):
f = open(filepath,"r")
s = f.read()
f.close()
f = open(filepath+"."+encoding,"w")
try:
f.write(s.decode(chardet.detect(s).get('encoding','GBK')).encode(encoding))
except:
print "convert error ",filepath
f.close()
def convertAll(files, encoding):
for file in files:
convert(file, encoding)
def handle(arg):
(dirs,files) = getFiles(arg.dir, arg.extension)
convertAll(files, arg.encoding)
if arg.recursive:
while dirs:
newdirs = []
for dir in dirs:
(ddir,files) = getFiles(dir, ext=arg.extension)
convertAll(files, arg.encoding)
newdirs.extend(ddir)
dirs = newdirs
if __name__=="__main__":
handle(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment