Skip to content

Instantly share code, notes, and snippets.

@chaomai
Created September 8, 2012 09:16
Show Gist options
  • Save chaomai/3672964 to your computer and use it in GitHub Desktop.
Save chaomai/3672964 to your computer and use it in GitHub Desktop.
Python 3: detect encoding
#detect files' encoding
#tested under Python3
from chardet.universaldetector import UniversalDetector
import os
import os.path
def detect(filename, filetype, out_enc='utf-8'):
(filepath, name) = os.path.split(filename)
if filetype == os.path.splitext(name)[1]:
try:
f = open(filename, 'rb')
b = b' '
b += f.read(1024)
u = UniversalDetector()
u.reset()
u.feed(b)
u.close()
f.seek(0)
b = f.read()
f.close()
in_enc = u.result['encoding']
print(filename + ' is ' + in_enc)
except IOError:
print('Error:' + filename + ' fail to converted from ' + in_enc + ' to ' + out_enc)
finally:
f.close()
else:
return
def find_and_operate(sou_dir, filetype, isloop_subdir = True):
if '' == filetype:
print('filetype is not defined')
print('Quit')
return
else:
for item in os.listdir(sou_dir):
subdir = os.path.join(sou_dir, item)
if os.path.isfile(subdir):
detect(subdir, filetype,'utf-8')
else:
if isloop_subdir:
find_and_operate(subdir, filetype, isloop_subdir)
else:
pass
def main():
directory = input('directory:')
flag = input('is loop sub dir(t or f, Default is True):')
if (flag == '') | (flag == 't'):
isloop_subdir = True
else:
isloop_subdir = False
filetype = input('file suffix(must defined):')
find_and_operate(directory, filetype, isloop_subdir)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment