Skip to content

Instantly share code, notes, and snippets.

@xlbruce
Last active April 30, 2021 15:02
Show Gist options
  • Save xlbruce/1394b96e6985ff3e5247d82ee6ea77e7 to your computer and use it in GitHub Desktop.
Save xlbruce/1394b96e6985ff3e5247d82ee6ea77e7 to your computer and use it in GitHub Desktop.
Simple CSV to Excel converter.
import argparse
import asyncio
import glob
import os
import pandas as pd
# Setup parser
description = '''Convert CSV files or an entire directory to XLSX.\n
Examples:
- Convert a single file to a directory
$ python main.py /path/to/file.csv /path/to/output/
- Convert a single file with custom name
$ python main.py /path/to/file.csv /path/to/output/custom.xlsx
- Convert an entire directory and sub-directories to a directory
$ python main.py /path/to/dir/ /path/to/output --recurse
- The --index option indexes data
$ python main.py /path/to/file.csv /path/to/output --index
'''
parser = argparse.ArgumentParser(description=description, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('input_path', type=str, help='Path to a CSV file or a directory containing CSV files')
parser.add_argument('output_path', type=str, help='Path to save the XLSX files. Can be a directory or a path ending with .xlsx')
parser.add_argument('--index', dest='index', action='store_true', help="First column will be the index")
parser.add_argument('--no-index', dest='index', action='store_false', help="No index output file")
parser.add_argument('--recurse', dest='recurse', action='store_true', help="If input_path is a DIR, all CSV files in sub-directories will be converted")
parser.add_argument('--no-recurse', dest='recurse', action='store_false', help="No recurse input_path")
parser.set_defaults(index=False)
parser.set_defaults(recurse=False)
args = parser.parse_args()
########
async def csv_to_excel(csv_path: str, output_path: str, index=False):
if os.path.isdir(output_path):
basename = os.path.basename(csv_path).split('.')[0]
output_path = os.path.join(output_path, f"{basename}.xlsx")
print(f'after conversion, output_path is {output_path}')
csv = pd.read_csv(csv_path)
csv.to_excel(output_path, index=index)
async def convert_dir(input_dir: str, output_dir: str, recurse: bool=False, index: bool=False):
glob_path = input_dir
if recurse:
glob_path = f"{glob_path}/**"
filenames = glob.glob(f"{glob_path}/*.csv", recursive=recurse)
tasks = [csv_to_excel(f, output_dir, index) for f in filenames]
print('disparando tasks de conversao...')
await asyncio.gather(*tasks)
async def main(args: argparse.Namespace):
if os.path.isdir(args.input_path):
if not os.path.isdir(args.output_path):
raise ValueError("When input path is a directory, output path MUST be a directory")
await convert_dir(args.input_path, args.output_path, args.recurse)
return
if not os.path.isfile(args.input_path):
raise ValueError(f"Input file [{args.input_path}] is not valid")
if os.path.isfile(args.output_path):
raise ValueError(f"Output file [{args.output_path}] already exists")
await csv_to_excel(args.input_path, args.output_path, args.index)
if __name__ == '__main__':
asyncio.run(main(args))
et-xmlfile==1.1.0
numpy==1.20.2
openpyxl==3.0.7
pandas==1.2.4
python-dateutil==2.8.1
pytz==2021.1
six==1.15.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment