Skip to content

Instantly share code, notes, and snippets.

@nateify
Last active January 2, 2021 00:45
Show Gist options
  • Save nateify/25af2797ad8458aef188b15683003f45 to your computer and use it in GitHub Desktop.
Save nateify/25af2797ad8458aef188b15683003f45 to your computer and use it in GitHub Desktop.
Convert text-based tables to CSV - specifically for tables generated by https://www.tablesgenerator.com/text_tables/ - does not support multiline or vertically merged cells
import csv
import re
import os
import sys
def table_to_csv(args):
if len(args) < 3:
outpath = os.path.join(
os.path.dirname(os.path.abspath(args[1])),
os.path.splitext(args[1])[0] + ".csv",
)
else:
outpath = os.path.abspath(args[2])
if os.path.isfile(outpath):
print(outpath, "already exists")
confirm = input("Overwrite? Y/N ")
if confirm.lower() != "y":
exit(1)
with open(args[1], encoding="utf-8", errors="ignore") as file:
tablelines_raw = file.readlines()
table_positions_dict = dict()
table_list_cleaned = []
for index, line in enumerate(tablelines_raw):
line = line.rstrip()
if re.match(r"^\+[-=]{2,}.+\+$", line):
table_positions_dict[index] = [
pos for pos, char in enumerate(line) if char == "+"
]
elif re.match(r"^[╔╠╒╞├][─═]{2,}", line):
table_positions_dict[index] = [
pos
for pos, char in enumerate(line)
if re.match(r"[╔╦╗╠╬╣╒╤╕╞╪╡├┼┤]", char)
]
if (index % 2) != 0:
try:
splitpoints = table_positions_dict[index - 1][:-1]
line = line[:-1] + " "
raw_splits = [
line[i:j] for i, j in zip(splitpoints, splitpoints[1:] + [None])
]
splits_cleaned = [x[1:].strip() for x in raw_splits]
table_list_cleaned.append(splits_cleaned)
except KeyError:
print("Error: input file does not have expected table structure")
exit(1)
with open(outpath, "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f, dialect="excel")
writer.writerows(table_list_cleaned)
if len(sys.argv) < 2:
print("Error: not enough arguments")
print("Usage: table_to_csv.py input.txt [output.csv]")
exit(1)
table_to_csv(sys.argv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment