Skip to content

Instantly share code, notes, and snippets.

@xtornasol512
Created April 12, 2024 01:54
Show Gist options
  • Save xtornasol512/c1e2a4f46b931b03e2c483d1cfa52bdc to your computer and use it in GitHub Desktop.
Save xtornasol512/c1e2a4f46b931b03e2c483d1cfa52bdc to your computer and use it in GitHub Desktop.
Converting utf files to certain encode format this case cp1252
import csv
import unicodedata
import sys
def normalize_and_convert(input_filepath, output_filepath):
""" The function manage to open a file with utf and converting safely into cp1252,
make adjustments to accept more formats """
# Open the input file with UTF-8 encoding
with open(input_filepath, mode='r', encoding='utf-8', newline='') as infile:
reader = csv.reader(infile)
# Open the output file with CP1252 encoding
with open(output_filepath, mode='w', encoding='cp1252', errors='replace', newline='') as outfile:
writer = csv.writer(outfile)
# Process each row in the input CSV file
for row in reader:
# Apply normalization (NFC) to each cell in the row
normalized_row = [unicodedata.normalize('NFC', cell) for cell in row]
# Write the normalized row to the output CSV file
writer.writerow(normalized_row)
def main():
# Check if the command line arguments are provided
if len(sys.argv) != 3:
print("Usage: python3 converting.py <input_file> <output_file>")
sys.exit(1)
input_file = sys.argv[1]
output_file = sys.argv[2]
# Run the function to normalize and convert the CSV file
normalize_and_convert(input_file, output_file)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment