Skip to content

Instantly share code, notes, and snippets.

@unsafe9
Last active October 1, 2022 07:59
Show Gist options
  • Save unsafe9/b2893b73e4781cfb82533cbbf3773945 to your computer and use it in GitHub Desktop.
Save unsafe9/b2893b73e4781cfb82533cbbf3773945 to your computer and use it in GitHub Desktop.
import glob
import chardet # pip install chardet
def decode(d, preferred_encodings):
for e in preferred_encodings:
try:
return d.decode(e)
except UnicodeDecodeError:
pass
# chardet is not so smart
return d.decode(chardet.detect(d).get("encoding"))
def main():
for file in glob.glob("*.txt"):
with open(file, "rb") as f:
d = decode(f.read(), preferred_encodings=['cp949'])
with open(file, "wb") as f:
f.write(d.encode("utf-8"))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment