Skip to content

Instantly share code, notes, and snippets.

@kkew3
Created July 11, 2019 15:32
Show Gist options
  • Save kkew3/4c83feb1099a52e9c17243a580ea3152 to your computer and use it in GitHub Desktop.
Save kkew3/4c83feb1099a52e9c17243a580ea3152 to your computer and use it in GitHub Desktop.
Small utility to select columns by name (assuming the first row contains titles) from CSV file without ambiguity (`csvcut` from `csvkit` has ambiguity currently)
#!/usr/bin/env python3
import argparse
import logging
import sys
def make_parser():
parser = argparse.ArgumentParser(
description='Select column(s) of CSV file by name assuming the first '
'row of the CSV lists the column names. Currently the '
'script does not support quoting.')
parser.add_argument('-d', '--delimiter', default=',',
help='field delimiter, default to comma')
parser.add_argument('-F', '--no-print-field', action='store_false',
dest='print_field',
help='to print the selected title')
parser.add_argument('-S', '--strict', action='store_true',
help='raise error if one of FIELDs does not exist')
parser.add_argument('fields', nargs='*', metavar='FIELD',
help='FIELD to select')
return parser
def _main():
args = make_parser().parse_args()
logging.basicConfig(format='%(filename)s: %(levelname)s: %(message)s',
level=logging.DEBUG)
try:
infile = sys.stdin
titles = next(infile).rstrip('\n').split(args.delimiter)
indices = []
for i, x in enumerate(args.fields):
try:
j = titles.index(x)
except ValueError:
if args.strict:
logging.error('Field "%s" does not exist; aborted',
args.fields[i])
return 4
logging.warning('Field %s does not exist', args.fields[i])
else:
indices.append(j)
if args.print_field:
print(args.delimiter.join(titles[j] for j in indices))
if args.fields:
for line in infile:
tokens = line.rstrip('\n').split(args.delimiter)
try:
filtered_line = [tokens[j] for j in indices]
except IndexError:
filtered_line = []
for jj in indices:
try:
filtered_lin.append(tokens[jj])
except IndexError:
pass
if filtered_line:
print(args.delimiter.join(filtered_line))
except KeyboardInterrupt:
return 130
except BrokenPipeError:
sys.stderr.close()
else:
return 0
if __name__ == '__main__':
sys.exit(_main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment