Created
April 12, 2024 01:54
-
-
Save xtornasol512/c1e2a4f46b931b03e2c483d1cfa52bdc to your computer and use it in GitHub Desktop.
Converting utf files to certain encode format this case cp1252
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import unicodedata | |
import sys | |
def normalize_and_convert(input_filepath, output_filepath): | |
""" The function manage to open a file with utf and converting safely into cp1252, | |
make adjustments to accept more formats """ | |
# Open the input file with UTF-8 encoding | |
with open(input_filepath, mode='r', encoding='utf-8', newline='') as infile: | |
reader = csv.reader(infile) | |
# Open the output file with CP1252 encoding | |
with open(output_filepath, mode='w', encoding='cp1252', errors='replace', newline='') as outfile: | |
writer = csv.writer(outfile) | |
# Process each row in the input CSV file | |
for row in reader: | |
# Apply normalization (NFC) to each cell in the row | |
normalized_row = [unicodedata.normalize('NFC', cell) for cell in row] | |
# Write the normalized row to the output CSV file | |
writer.writerow(normalized_row) | |
def main(): | |
# Check if the command line arguments are provided | |
if len(sys.argv) != 3: | |
print("Usage: python3 converting.py <input_file> <output_file>") | |
sys.exit(1) | |
input_file = sys.argv[1] | |
output_file = sys.argv[2] | |
# Run the function to normalize and convert the CSV file | |
normalize_and_convert(input_file, output_file) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment