Created
June 22, 2023 03:23
-
-
Save koralle/9dde1002533bcb1e3410f0a23b2ab3eb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
import os | |
import chardet | |
import concurrent.futures | |
import fire | |
from tqdm import tqdm | |
ignore_entries = ["ignore1", "ignore2", "..."] # add entries to ignore | |
class FileConverter: | |
def __init__(self): | |
self.overwrite = False | |
def set_overwrite(self, _): | |
self.overwrite = True | |
def convert_files(self, path): | |
# Get encoding of the file | |
with open(path, 'rb') as f: | |
detected = chardet.detect(f.read()) | |
detected_encoding = detected['encoding'] | |
# Convert encoding to utf-8 and CRLF to LF if not | |
if detected_encoding != 'utf-8': | |
with open(path, 'r', encoding=detected_encoding, newline='') as f: | |
lines = f.readlines() | |
content = ''.join(line.replace('\r\n', '\n') for line in lines) | |
# Overwrite the file with the new content | |
if self.overwrite: | |
# Open the file for writing in a separate step | |
with open(path, 'w', encoding='utf-8', newline='\n') as f: | |
f.write(content) | |
def main(overwrite=False, directory=None): | |
file_converter = FileConverter() | |
if overwrite: | |
file_converter.set_overwrite(True) | |
base_path = directory if directory else os.path.dirname(os.path.realpath(__file__)) | |
search_path = os.path.join(base_path, '**') | |
all_files = glob.glob(search_path, recursive=True) | |
target_files = [file for file in all_files if file not in ignore_entries and os.path.isfile(file)] | |
# Use a thread pool to process files in parallel | |
executor = concurrent.futures.ThreadPoolExecutor() | |
futures = executor.map(file_converter.convert_files, target_files) | |
try: | |
for _ in tqdm(futures, total=len(target_files)): | |
pass # The loop is required to force execution of the futures | |
except KeyboardInterrupt: | |
executor.shutdown(wait=False) | |
print("\nProcess interrupted by user. Exiting...") | |
if __name__ == "__main__": | |
fire.Fire(main) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment