Skip to content

Instantly share code, notes, and snippets.

@coder-tan
Created March 3, 2016 08:47
Show Gist options
  • Save coder-tan/cd127c6563dc702b86bf to your computer and use it in GitHub Desktop.
Save coder-tan/cd127c6563dc702b86bf to your computer and use it in GitHub Desktop.
convert text file encodings
# convert text file encoding
import codecs
import os
from shutil import copyfile
encoding_china = 'gb18030'
encoding_gbk = 'gbk'
encoding_source = encoding_china
encoding_target = 'utf-8'
def con(source, target):
blocksize = 1048576 # or some other, desired size in bytes
try:
with codecs.open(source, "r", encoding_source) as sourceFile:
with codecs.open(target, "w", encoding_target) as targetFile:
while True:
contents = sourceFile.read(blocksize)
if not contents:
break
targetFile.write(contents)
except UnicodeDecodeError:
copyfile(source, target)
pass
def test():
source = '/Users/tan/tan/dev/python/dian/source'
conn(source)
check(source)
check(source.replace('source', 'target'))
def check(path):
files = []
count_dir(files, path)
print(len(files))
def conn(path):
if not os.path.isfile(path):
target = path.replace('source', 'target')
if not os.path.exists(target):
os.mkdir(target)
for line in os.listdir(path):
conn(path + '/' + line)
else:
con(path, path.replace('source', 'target'))
print(path)
def count_dir(files, file):
if not os.path.isfile(file):
for line in os.listdir(file):
count_dir(files, file + '/' + line)
else:
files.append(file)
test()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment