Skip to content

Instantly share code, notes, and snippets.

@izackwu
Created February 26, 2020 12:25
Show Gist options
  • Save izackwu/0442c6bf8f408649881e5d7f4ce6714e to your computer and use it in GitHub Desktop.
Save izackwu/0442c6bf8f408649881e5d7f4ce6714e to your computer and use it in GitHub Desktop.
Convet a file to UTF-8 encoding automatically with Python
import chardet
import sys
def convert_to_utf8(raw_content, encoding=None):
encoding = encoding or chardet.detect(raw_content)["encoding"]
try:
return raw_content.decode(encoding)
except UnicodeDecodeError:
print("\tFailed to decode with {} encoding".format(encoding))
print("\tTry it with GBK")
try:
return raw_content.decode("GBK")
except UnicodeDecodeError:
print("\tGBK won't work either")
return None
def main(argv):
for i in range(1, len(argv)):
file_path = argv[i]
print("{:d}/{:d}: {}".format(i, len(argv) - 1, file_path))
with open(file_path, mode="rb") as f:
raw = f.read()
converted = convert_to_utf8(raw)
if converted is None:
print("Failure")
continue
with open(file_path, mode="w", encoding="utf8") as f:
f.write(converted)
print("Success")
if __name__ == '__main__':
main(sys.argv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment