Created
August 28, 2018 18:57
-
-
Save drelatgithub/cf7ac442eff9d777dece276ebbc7458d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################################### | |
# | |
# csv2xlsx.py | |
# version 0.1 | |
# | |
# Copy all data of .csv files in a directory to different sheets of a single | |
# .xlsx file. | |
# | |
# 3rd party libs: openpyxl, pandas | |
############################################################################### | |
import argparse # command line input | |
from openpyxl import Workbook # for xlsx support | |
import os # dealing with path | |
import pandas as pd # read dataframe from csv and write | |
import re # for re.split(...) | |
from types import SimpleNamespace | |
# Config | |
conf = SimpleNamespace( | |
input_dir ="", | |
output_name ="", | |
auto_yes =False | |
) | |
class Util(object): | |
@staticmethod | |
def prompt_yes_no(): | |
print(" [yes/no]:", end=" ", flush=True) | |
judge = lambda x: True if x in ("yes", "y") else (False if x in ("no", "n") else None) | |
judge_result = judge(input().lower()) | |
while judge_result is None: | |
print("Please answer yes or no:", end=" ", flush=True) | |
judge_result = judge(input().lower()) | |
else: | |
return judge_result | |
def work(): | |
# Check file existence | |
if os.path.isfile(conf.output_name): | |
print("File {} exists.".format(conf.output_name), end=" ") | |
if conf.auto_yes: | |
print("Overwriting...") | |
else: | |
print("Overwrite?", end="") | |
if not Util.prompt_yes_no(): | |
return | |
# Generate writer | |
wb = Workbook() | |
writer = pd.ExcelWriter(conf.output_name, engine="openpyxl") | |
writer.book = wb | |
# Recursively search for files and write them to excel spreadsheet | |
for root, dirs, files in os.walk(conf.input_dir): | |
for each_file in files: | |
if each_file.endswith(".csv"): | |
file_name = os.path.join(root, each_file) | |
# Resolving name conflicts. | |
# TODO: | |
# - Trim special characters | |
# - Trim too long names | |
# Other methods like including file path might be used, but beware that | |
# - sheet name cannot contain \/*[]?: | |
# - sheet name cannot exceed 31 chars in some implementations | |
i = 0 | |
sheet_name = '.'.join(each_file.split('.')[:-1]) | |
while sheet_name in wb.sheetnames: | |
i += 1 | |
sheet_name = '.'.join(['.'.join(each_file.split('.')[:-1]), str(i)]) | |
print("Working on sheet", sheet_name, "from file", file_name) | |
df = pd.read_csv(file_name) | |
df.to_excel(writer, sheet_name, index=False) | |
# Save spreadsheet | |
if len(wb.sheetnames) > 1: | |
wb.remove(wb.active) # Remove default sheet | |
writer.save() | |
else: | |
print("No sheet added.") | |
writer.close() | |
if __name__ == "__main__": | |
# Parse the arguments | |
parser = argparse.ArgumentParser() | |
parser.add_argument("input_dir", type=str, help="The input directory") | |
parser.add_argument("output_name", type=str, help="The output file name with path") | |
parser.add_argument("-y", action="store_true", help="Automatically answer yes to prompts") | |
args = parser.parse_args() | |
conf.auto_yes = args.y | |
conf.input_dir = args.input_dir | |
conf.output_name = args.output_name | |
# Do the work | |
work() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment