Skip to content

Instantly share code, notes, and snippets.

@drelatgithub
Created August 28, 2018 18:57
Show Gist options
  • Save drelatgithub/cf7ac442eff9d777dece276ebbc7458d to your computer and use it in GitHub Desktop.
Save drelatgithub/cf7ac442eff9d777dece276ebbc7458d to your computer and use it in GitHub Desktop.
###############################################################################
#
# csv2xlsx.py
# version 0.1
#
# Copy all data of .csv files in a directory to different sheets of a single
# .xlsx file.
#
# 3rd party libs: openpyxl, pandas
###############################################################################
import argparse # command line input
from openpyxl import Workbook # for xlsx support
import os # dealing with path
import pandas as pd # read dataframe from csv and write
import re # for re.split(...)
from types import SimpleNamespace
# Config
conf = SimpleNamespace(
input_dir ="",
output_name ="",
auto_yes =False
)
class Util(object):
@staticmethod
def prompt_yes_no():
print(" [yes/no]:", end=" ", flush=True)
judge = lambda x: True if x in ("yes", "y") else (False if x in ("no", "n") else None)
judge_result = judge(input().lower())
while judge_result is None:
print("Please answer yes or no:", end=" ", flush=True)
judge_result = judge(input().lower())
else:
return judge_result
def work():
# Check file existence
if os.path.isfile(conf.output_name):
print("File {} exists.".format(conf.output_name), end=" ")
if conf.auto_yes:
print("Overwriting...")
else:
print("Overwrite?", end="")
if not Util.prompt_yes_no():
return
# Generate writer
wb = Workbook()
writer = pd.ExcelWriter(conf.output_name, engine="openpyxl")
writer.book = wb
# Recursively search for files and write them to excel spreadsheet
for root, dirs, files in os.walk(conf.input_dir):
for each_file in files:
if each_file.endswith(".csv"):
file_name = os.path.join(root, each_file)
# Resolving name conflicts.
# TODO:
# - Trim special characters
# - Trim too long names
# Other methods like including file path might be used, but beware that
# - sheet name cannot contain \/*[]?:
# - sheet name cannot exceed 31 chars in some implementations
i = 0
sheet_name = '.'.join(each_file.split('.')[:-1])
while sheet_name in wb.sheetnames:
i += 1
sheet_name = '.'.join(['.'.join(each_file.split('.')[:-1]), str(i)])
print("Working on sheet", sheet_name, "from file", file_name)
df = pd.read_csv(file_name)
df.to_excel(writer, sheet_name, index=False)
# Save spreadsheet
if len(wb.sheetnames) > 1:
wb.remove(wb.active) # Remove default sheet
writer.save()
else:
print("No sheet added.")
writer.close()
if __name__ == "__main__":
# Parse the arguments
parser = argparse.ArgumentParser()
parser.add_argument("input_dir", type=str, help="The input directory")
parser.add_argument("output_name", type=str, help="The output file name with path")
parser.add_argument("-y", action="store_true", help="Automatically answer yes to prompts")
args = parser.parse_args()
conf.auto_yes = args.y
conf.input_dir = args.input_dir
conf.output_name = args.output_name
# Do the work
work()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment