Skip to content

Instantly share code, notes, and snippets.

@hanx11
Created October 19, 2019 12:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hanx11/ad614cd52ead1a207283f8fa79480d6d to your computer and use it in GitHub Desktop.
Save hanx11/ad614cd52ead1a207283f8fa79480d6d to your computer and use it in GitHub Desktop.
# -*- coding:utf-8 -*-
import codecs
from datetime import datetime
source_file = 'big_gbk_file.csv'
target_file = 'big_utf8_file.csv'
block_size = 10000 * 1024 # or some other, desired size in bytes
def main():
start_time = datetime.now()
with codecs.open(source_file, "r", "GBK") as sourceFile:
with codecs.open(target_file, "w", "utf-8") as targetFile:
while True:
content = sourceFile.read(block_size)
if not content:
break
targetFile.write(content)
end_time = datetime.now()
print('Cost time: {}'.format(end_time - start_time))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment