drelatgithub/csv2xlsx.py

## csv2xlsx.py
###############################################################################
#
#  csv2xlsx.py
#  version 0.1
#
#  Copy all data of .csv files in a directory to different sheets of a single
#  .xlsx file.
#
#  3rd party libs: openpyxl, pandas
###############################################################################

import argparse                 # command line input
from openpyxl import Workbook   # for xlsx support
import os                       # dealing with path
import pandas as pd             # read dataframe from csv and write
import re                       # for re.split(...)
from types import SimpleNamespace

# Config
conf = SimpleNamespace(
    input_dir   ="",
    output_name ="",
    auto_yes    =False
)

class Util(object):
    @staticmethod
    def prompt_yes_no():
        print(" [yes/no]:", end=" ", flush=True)
        judge = lambda x: True if x in ("yes", "y") else (False if x in ("no", "n") else None)
        judge_result = judge(input().lower())
        while judge_result is None:
            print("Please answer yes or no:", end=" ", flush=True)
            judge_result = judge(input().lower())
        else:
            return judge_result

def work():
    # Check file existence
    if os.path.isfile(conf.output_name):
        print("File {} exists.".format(conf.output_name), end=" ")
        if conf.auto_yes:
            print("Overwriting...")
        else:
            print("Overwrite?", end="")
            if not Util.prompt_yes_no():
                return

    # Generate writer
    wb = Workbook()
    writer = pd.ExcelWriter(conf.output_name, engine="openpyxl")
    writer.book = wb

    # Recursively search for files and write them to excel spreadsheet
    for root, dirs, files in os.walk(conf.input_dir):
        for each_file in files:
            if each_file.endswith(".csv"):
                file_name = os.path.join(root, each_file)

                # Resolving name conflicts.
                # TODO:
                #   - Trim special characters
                #   - Trim too long names
                # Other methods like including file path might be used, but beware that
                #   - sheet name cannot contain \/*[]?:
                #   - sheet name cannot exceed 31 chars in some implementations
                i = 0
                sheet_name = '.'.join(each_file.split('.')[:-1])
                while sheet_name in wb.sheetnames:
                    i += 1
                    sheet_name = '.'.join(['.'.join(each_file.split('.')[:-1]), str(i)])
                print("Working on sheet", sheet_name, "from file", file_name)
                df = pd.read_csv(file_name)
                df.to_excel(writer, sheet_name, index=False)

    # Save spreadsheet
    if len(wb.sheetnames) > 1:
        wb.remove(wb.active) # Remove default sheet
        writer.save()
    else:
        print("No sheet added.")
    writer.close()

if __name__ == "__main__":

    # Parse the arguments
    parser = argparse.ArgumentParser()

    parser.add_argument("input_dir", type=str, help="The input directory")
    parser.add_argument("output_name", type=str, help="The output file name with path")
    parser.add_argument("-y", action="store_true", help="Automatically answer yes to prompts")

    args = parser.parse_args()

    conf.auto_yes = args.y
    conf.input_dir = args.input_dir
    conf.output_name = args.output_name

    # Do the work
    work()
	###############################################################################
	#
	# csv2xlsx.py
	# version 0.1
	#
	# Copy all data of .csv files in a directory to different sheets of a single
	# .xlsx file.
	#
	# 3rd party libs: openpyxl, pandas
	###############################################################################

	import argparse # command line input
	from openpyxl import Workbook # for xlsx support
	import os # dealing with path
	import pandas as pd # read dataframe from csv and write
	import re # for re.split(...)
	from types import SimpleNamespace

	# Config
	conf = SimpleNamespace(
	input_dir ="",
	output_name ="",
	auto_yes =False
	)

	class Util(object):
	@staticmethod
	def prompt_yes_no():
	print(" [yes/no]:", end=" ", flush=True)
	judge = lambda x: True if x in ("yes", "y") else (False if x in ("no", "n") else None)
	judge_result = judge(input().lower())
	while judge_result is None:
	print("Please answer yes or no:", end=" ", flush=True)
	judge_result = judge(input().lower())
	else:
	return judge_result

	def work():
	# Check file existence
	if os.path.isfile(conf.output_name):
	print("File {} exists.".format(conf.output_name), end=" ")
	if conf.auto_yes:
	print("Overwriting...")
	else:
	print("Overwrite?", end="")
	if not Util.prompt_yes_no():
	return

	# Generate writer
	wb = Workbook()
	writer = pd.ExcelWriter(conf.output_name, engine="openpyxl")
	writer.book = wb

	# Recursively search for files and write them to excel spreadsheet
	for root, dirs, files in os.walk(conf.input_dir):
	for each_file in files:
	if each_file.endswith(".csv"):
	file_name = os.path.join(root, each_file)

	# Resolving name conflicts.
	# TODO:
	# - Trim special characters
	# - Trim too long names
	# Other methods like including file path might be used, but beware that
	# - sheet name cannot contain \/*[]?:
	# - sheet name cannot exceed 31 chars in some implementations
	i = 0
	sheet_name = '.'.join(each_file.split('.')[:-1])
	while sheet_name in wb.sheetnames:
	i += 1
	sheet_name = '.'.join(['.'.join(each_file.split('.')[:-1]), str(i)])
	print("Working on sheet", sheet_name, "from file", file_name)
	df = pd.read_csv(file_name)
	df.to_excel(writer, sheet_name, index=False)

	# Save spreadsheet
	if len(wb.sheetnames) > 1:
	wb.remove(wb.active) # Remove default sheet
	writer.save()
	else:
	print("No sheet added.")
	writer.close()

	if __name__ == "__main__":

	# Parse the arguments
	parser = argparse.ArgumentParser()

	parser.add_argument("input_dir", type=str, help="The input directory")
	parser.add_argument("output_name", type=str, help="The output file name with path")
	parser.add_argument("-y", action="store_true", help="Automatically answer yes to prompts")

	args = parser.parse_args()

	conf.auto_yes = args.y
	conf.input_dir = args.input_dir
	conf.output_name = args.output_name

	# Do the work
	work()