Skip to content

Instantly share code, notes, and snippets.

@koralle
Created June 22, 2023 03:23
Show Gist options
  • Save koralle/9dde1002533bcb1e3410f0a23b2ab3eb to your computer and use it in GitHub Desktop.
Save koralle/9dde1002533bcb1e3410f0a23b2ab3eb to your computer and use it in GitHub Desktop.
import glob
import os
import chardet
import concurrent.futures
import fire
from tqdm import tqdm
ignore_entries = ["ignore1", "ignore2", "..."] # add entries to ignore
class FileConverter:
def __init__(self):
self.overwrite = False
def set_overwrite(self, _):
self.overwrite = True
def convert_files(self, path):
# Get encoding of the file
with open(path, 'rb') as f:
detected = chardet.detect(f.read())
detected_encoding = detected['encoding']
# Convert encoding to utf-8 and CRLF to LF if not
if detected_encoding != 'utf-8':
with open(path, 'r', encoding=detected_encoding, newline='') as f:
lines = f.readlines()
content = ''.join(line.replace('\r\n', '\n') for line in lines)
# Overwrite the file with the new content
if self.overwrite:
# Open the file for writing in a separate step
with open(path, 'w', encoding='utf-8', newline='\n') as f:
f.write(content)
def main(overwrite=False, directory=None):
file_converter = FileConverter()
if overwrite:
file_converter.set_overwrite(True)
base_path = directory if directory else os.path.dirname(os.path.realpath(__file__))
search_path = os.path.join(base_path, '**')
all_files = glob.glob(search_path, recursive=True)
target_files = [file for file in all_files if file not in ignore_entries and os.path.isfile(file)]
# Use a thread pool to process files in parallel
executor = concurrent.futures.ThreadPoolExecutor()
futures = executor.map(file_converter.convert_files, target_files)
try:
for _ in tqdm(futures, total=len(target_files)):
pass # The loop is required to force execution of the futures
except KeyboardInterrupt:
executor.shutdown(wait=False)
print("\nProcess interrupted by user. Exiting...")
if __name__ == "__main__":
fire.Fire(main)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment