Created
January 5, 2023 16:10
-
-
Save tomers/32e49c06321cb6673940b886ddf3dec7 to your computer and use it in GitHub Desktop.
Haifa Vadis XLSX to CSV file converter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import csv | |
import re | |
from itertools import islice | |
from pathlib import Path | |
import openpyxl | |
import phonenumbers | |
import validators | |
def convert_xlsx(xlsx: Path, prefix, csv_all): | |
print(f"Converting {xlsx}") | |
# read Excel file | |
workbook = openpyxl.load_workbook(filename=xlsx) | |
sheet = workbook.active | |
# delete unwanted columns | |
sheet.delete_cols(idx=4, amount=2) | |
# rename columns | |
sheet['A1'] = 'First Name' | |
sheet['B1'] = 'Mobile Phone' | |
sheet['C1'] = 'E-mail Address' | |
# add prefix to name | |
for row in range(2, sheet.max_row + 1): | |
cell = f'A{row}' | |
name = sheet[cell].value | |
sheet[cell] = f'{str(prefix) * 3} {name}' | |
# normalize phone numbers | |
for row in range(2, sheet.max_row + 1): | |
cell = f'B{row}' | |
value = str(sheet[cell].value) | |
phone = None | |
try: | |
phone = phonenumbers.parse(value, 'IL') | |
phone = phonenumbers.format_number( | |
phone, phonenumbers.PhoneNumberFormat.INTERNATIONAL) | |
except phonenumbers.phonenumberutil.NumberParseException as e: | |
print(f"Failed parsing cell {cell} with value {value}: {e}") | |
sheet[cell] = str(phone) | |
# normalize emails | |
for row in range(2, sheet.max_row + 1): | |
cell = f'C{row}' | |
value = str(sheet[cell].value) | |
sheet[cell] = value if validators.email(value) else None | |
# normalize cells | |
for column in range(1, sheet.max_column + 1): | |
for row in range(2, sheet.max_row + 1): | |
cell = sheet.cell(row=row, column=column) | |
cell.value = re.sub(r'\s+', ' ', str(cell.value).strip()) | |
# store CSV file | |
csv_output = Path(str(xlsx) + '.csv') | |
with open(csv_output, 'w', newline='') as f: | |
c = csv.writer(f) | |
for r in islice(sheet.rows, 1, None): | |
c.writerow([cell.value for cell in r]) | |
# append to global CSV file | |
c = csv.writer(csv_all) | |
for r in islice(sheet.rows, 1, None): | |
c.writerow([cell.value for cell in r]) | |
def main(): | |
input_files = list(Path.cwd().glob('*.xlsx')) | |
with open('all.csv', 'w', newline='') as f: | |
for i, xlsx in enumerate(input_files): | |
convert_xlsx(xlsx, i, f) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment