Skip to content

Instantly share code, notes, and snippets.

@denisxab
Created October 7, 2022 15:17
Show Gist options
  • Save denisxab/9c3d1cfbcbabed5fb40a9b2d0cd971a6 to your computer and use it in GitHub Desktop.
Save denisxab/9c3d1cfbcbabed5fb40a9b2d0cd971a6 to your computer and use it in GitHub Desktop.
Конвертация кодировки у файлов
import os
from pathlib import Path
import codecs
import chardet
from pathlib import Path
def get_encoding(path:str)->str:
"""
Получить кодировку файла, чтобы его можно было коректно прочитать
"""
return chardet.detect(Path(path).read_bytes()).get('encoding')
def convert_charset_file(in_path,out_path=None,in_encode=None,out_encode='utf-8'):
in_file=Path(in_path)
if not out_path:
out_path = Path(in_path).resolve().parent / "out" / Path(in_path).name
if not in_encode:
in_encode=get_encoding(in_file)
if in_encode != out_encode:
BLOCKSIZE = 1048576 # or some other, desired size in bytes
with codecs.open(in_file, "r", in_encode) as sourceFile:
with codecs.open(out_path, "w", out_encode) as targetFile:
while True:
contents = sourceFile.read(BLOCKSIZE)
if not contents:
break
targetFile.write(contents)
# in_rt=in_file.read_text(encoding=in_encode)
# out_file=Path(out_path)
# out_file.write_text(in_rt,encoding='utf-8')
print(f"Перезаписан {in_path} из {in_encode} > {out_encode}")
def convert_charset_dir(in_path):
for p in os.listdir(in_path):
if (Path(in_path)/Path(p)).is_file():
convert_charset_file(Path(in_path)/p,in_encode='windows-1251')
convert_charset_dir('in')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment