Skip to content

Instantly share code, notes, and snippets.

@harkabeeparolus
Last active January 15, 2024 15:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save harkabeeparolus/aae10da864b20df15d406e453caf00ba to your computer and use it in GitHub Desktop.
Save harkabeeparolus/aae10da864b20df15d406e453caf00ba to your computer and use it in GitHub Desktop.
Try to autodetect and display CSV or TSV data in a terminal. Fall back to plain text if auto detection fails.
#! /usr/bin/env python3
"""Try to autodetect, parse and display CSV, but fallback to plain text otherwise."""
# Copyright 2024 Fredrik Mellström <https://github.com/harkabeeparolus>
# MIT License; SPDX short identifier: MIT
import argparse
import csv
import io
import itertools as it
import sys
from contextlib import suppress
from pathlib import Path
def main():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"-a",
"--any-delimiter",
action="store_true",
help="consider any character as a delimiter (default: ,;: and <tab>)",
)
parser.add_argument(
"-c", "--out-csv", action="store_true", help="output well-formed, standard CSV"
)
parser.add_argument("input_file", nargs="?")
args = parser.parse_args()
if sys.stdin.isatty() and not args.input_file:
parser.error("No input file specified.")
if args.input_file and not Path(args.input_file).is_file():
parser.error(f"Invalid file: {args.input_file}")
try:
if args.input_file:
with open(args.input_file, newline="") as f:
input_lines = tuple(f)
else:
input_lines = tuple(sys.stdin)
except ValueError:
sys.exit("Text decoding error, probably binary data.")
if output := read_csv(input_lines, any_delimiter=args.any_delimiter):
if args.out_csv:
write_csv(output)
else:
print_table(output)
else:
print("".join(input_lines), end="")
def read_csv(input_lines, any_delimiter=False):
dialect = None
delimiters = None if any_delimiter else ",\t;:"
with suppress(csv.Error):
dialect = csv.Sniffer().sniff("".join(input_lines), delimiters=delimiters)
if not dialect:
if all("\t" in line for line in input_lines):
dialect = "excel-tab"
elif all("," in line for line in input_lines):
dialect = "excel"
elif {len(line.split()) for line in input_lines if line} == {1}:
dialect = "excel-tab"
else:
return None
output = []
with suppress(csv.Error):
output = list(csv.reader(input_lines, dialect=dialect))
return output
def write_csv(input_lines, pad=""):
buffer = io.StringIO(newline="")
writer = csv.writer(buffer)
width = max(len(x) for x in input_lines)
padded = (it.islice(it.chain(x, it.repeat(pad)), width) for x in input_lines)
writer.writerows(padded)
sys.stdout.write(buffer.getvalue())
def print_table(output, separator=" │ "):
widths = {}
for line in output:
for i, field in enumerate(line):
if len(field) > widths.get(i, 0):
widths[i] = len(field)
for line in output:
print(separator.join(f"{col:{widths[i]}}" for i, col in enumerate(line)))
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment