charmoniumQ/csvcut

## csvcut
#!/usr/bin/env python3
import click
import sys
import csv

version = 0.1
version_info = '''
csvcut v{version}
by Sam Grayson
(backwards-c) 2017 GPLv3
'''

@click.command()
@click.option('--fields', '-f', type=str,
              help='select only these (numbered) fields in this order')
@click.option('--kfields', '-k', type=str,
              help='select only these keyword fields in this order')
@click.option('--complement', default=False, type=bool,
              help='inverts the selection')
# --help implicit option
@click.option('--version', 'version', flag_value=True, default=False, type=bool,
              help='prints version and exits')
@click.argument('file', default=sys.stdin, type=click.File('r'))
def main(fields, kfields, complement, version, file):
    '''CSV-aware cut -- outputs chosen columns of it's input

This script is a thin wrapper around Python's CSV (Comma-Separated Values)
library with a cut-like interface. It is capable of handling escaped commas and
newlines.

With no FILE, or when FILE is -, read standard input.

If neither keyword fields nor numbered fields are supplied, print the header.

For keyword fields with -k, the first row is interpreted as the header.

For numbered fields with -f, this accepts comma separated field-ranges of the
following form where N, and M are integers (like the cut syntax):

\b
N      N'th field
N-     from N'th field
N-M    from N'th to M'th (included) field
-M     from first to M'th (included) field

Fields out of the range of the row will extract the empty string. Use / in place
of a minus-sign to input negative number. Negative indices are interpreted as
counting from the end of the line.
'''

    if version:
        return print_version()
    if fields and not kfields:
        return print_fields(file, fields, complement)
    if kfields and not fields:
        return print_kfields(file, kfields, complement)
    if kfields and fields:
        raise RuntimeError('Cannot provide both numbered fields and keyword fields')
    if not kfields and not fields:
        return print_header(file)

def str_to_indices(fields):
    '''returns a list of indices or a tuple representing the range [M:None]'''
    slices = []
    fields = fields.split(',')
    for field in fields:
        if '-' in field:
            span = field.split('-')
            span = tuple(map(lambda x: x.replace('/', '-'), span))
            if len(span) != 2:
                raise RuntimeError('Cannot parse {field}\nShould be M, M-, -M, or N-M'.format(**locals()))
            if span[0] and span[1]:
                slices.append((int(span[0]), int(span[1])))
            elif span[0] and not span[1]:
                slices.append((int(span[0]), None))
            elif not span[0] and span[1]:
                slices.append((0, int(span[1])))
        else:
            field = field.replace('/', '-')
            slices.append(int(field))
    return slices

def print_fields(file, fields, complement):
    try:
        indices = str_to_indices(fields)
    except RuntimeError as e:
        print(str(e))
        return 1
    return print_fields_(file, sys.stdout, indices, complement)

def print_fields_(infile, outfile, indices, complement):
    csv_in = csv.reader(infile)
    csv_out = csv.writer(outfile)
    for line in csv_in:
        indices = indices_to_positive_indices(indices, len(line))
        if complement:
            indices = complement(indices, len(line))
        csv_out.writerow(project_arr(line, indices, ''))
    return 0

def print_kfields(file, kfields, complement):
    print_kfields_(file, sys.stdout, kfields.split(','), complement)

def print_kfields_(infile, outfile, fields, complement):
    csv_in = csv.DictReader(infile)
    csv_out = csv.DictWriter(outfile, fields)
    if complement:
        fields = [field for field in csv_in.fieldnames if field not in fields]
    for line in csv_in:
        csv_out.writerow(project(line, fields, ''))
    return 0

def print_header(file):
    csv_in = csv.DictReader(file)
    print('\n'.join(csv_in.fieldnames))
    return 0

def print_version():
    print(version_info.format(**globals()).strip())

def indices_to_positive_indices(indices, length):
    r = []
    for index in indices:
        if isinstance(index, tuple):
            start, stop = index
            if start < 0:
                start += length
            if stop is None:
                stop = length - 1
            if stop < 0:
                stop += length
            r.extend(range(start, stop + 1)) # inclusive, like unix cut
        else:
            if index < 0:
                index += length
            r.append(index)
    return r

def complement(indices, length):
    return [index for index in range(length) if index not in set(indices)]

def project(d, keys, default=None):
    return {key: d[key] if key in d else default
            for key in keys}

def project_arr(arr, indices, default=None):
    return [arr[index] if 0 <= index < len(arr) else default
            for index in indices]

def sniff_dialect(file):
    dialect = csv.Sniffer().sniff(file.read(1024))
    file.seek(0)
    return dialect

if __name__ == '__main__':
    main()
	#!/usr/bin/env python3
	import click
	import sys
	import csv

	version = 0.1
	version_info = '''
	csvcut v{version}
	by Sam Grayson
	(backwards-c) 2017 GPLv3
	'''

	@click.command()
	@click.option('--fields', '-f', type=str,
	help='select only these (numbered) fields in this order')
	@click.option('--kfields', '-k', type=str,
	help='select only these keyword fields in this order')
	@click.option('--complement', default=False, type=bool,
	help='inverts the selection')
	# --help implicit option
	@click.option('--version', 'version', flag_value=True, default=False, type=bool,
	help='prints version and exits')
	@click.argument('file', default=sys.stdin, type=click.File('r'))
	def main(fields, kfields, complement, version, file):
	'''CSV-aware cut -- outputs chosen columns of it's input

	This script is a thin wrapper around Python's CSV (Comma-Separated Values)
	library with a cut-like interface. It is capable of handling escaped commas and
	newlines.

	With no FILE, or when FILE is -, read standard input.

	If neither keyword fields nor numbered fields are supplied, print the header.

	For keyword fields with -k, the first row is interpreted as the header.

	For numbered fields with -f, this accepts comma separated field-ranges of the
	following form where N, and M are integers (like the cut syntax):

	\b
	N N'th field
	N- from N'th field
	N-M from N'th to M'th (included) field
	-M from first to M'th (included) field

	Fields out of the range of the row will extract the empty string. Use / in place
	of a minus-sign to input negative number. Negative indices are interpreted as
	counting from the end of the line.
	'''

	if version:
	return print_version()
	if fields and not kfields:
	return print_fields(file, fields, complement)
	if kfields and not fields:
	return print_kfields(file, kfields, complement)
	if kfields and fields:
	raise RuntimeError('Cannot provide both numbered fields and keyword fields')
	if not kfields and not fields:
	return print_header(file)

	def str_to_indices(fields):
	'''returns a list of indices or a tuple representing the range [M:None]'''
	slices = []
	fields = fields.split(',')
	for field in fields:
	if '-' in field:
	span = field.split('-')
	span = tuple(map(lambda x: x.replace('/', '-'), span))
	if len(span) != 2:
	raise RuntimeError('Cannot parse {field}\nShould be M, M-, -M, or N-M'.format(**locals()))
	if span[0] and span[1]:
	slices.append((int(span[0]), int(span[1])))
	elif span[0] and not span[1]:
	slices.append((int(span[0]), None))
	elif not span[0] and span[1]:
	slices.append((0, int(span[1])))
	else:
	field = field.replace('/', '-')
	slices.append(int(field))
	return slices

	def print_fields(file, fields, complement):
	try:
	indices = str_to_indices(fields)
	except RuntimeError as e:
	print(str(e))
	return 1
	return print_fields_(file, sys.stdout, indices, complement)

	def print_fields_(infile, outfile, indices, complement):
	csv_in = csv.reader(infile)
	csv_out = csv.writer(outfile)
	for line in csv_in:
	indices = indices_to_positive_indices(indices, len(line))
	if complement:
	indices = complement(indices, len(line))
	csv_out.writerow(project_arr(line, indices, ''))
	return 0

	def print_kfields(file, kfields, complement):
	print_kfields_(file, sys.stdout, kfields.split(','), complement)

	def print_kfields_(infile, outfile, fields, complement):
	csv_in = csv.DictReader(infile)
	csv_out = csv.DictWriter(outfile, fields)
	if complement:
	fields = [field for field in csv_in.fieldnames if field not in fields]
	for line in csv_in:
	csv_out.writerow(project(line, fields, ''))
	return 0

	def print_header(file):
	csv_in = csv.DictReader(file)
	print('\n'.join(csv_in.fieldnames))
	return 0

	def print_version():
	print(version_info.format(**globals()).strip())

	def indices_to_positive_indices(indices, length):
	r = []
	for index in indices:
	if isinstance(index, tuple):
	start, stop = index
	if start < 0:
	start += length
	if stop is None:
	stop = length - 1
	if stop < 0:
	stop += length
	r.extend(range(start, stop + 1)) # inclusive, like unix cut
	else:
	if index < 0:
	index += length
	r.append(index)
	return r

	def complement(indices, length):
	return [index for index in range(length) if index not in set(indices)]

	def project(d, keys, default=None):
	return {key: d[key] if key in d else default
	for key in keys}

	def project_arr(arr, indices, default=None):
	return [arr[index] if 0 <= index < len(arr) else default
	for index in indices]

	def sniff_dialect(file):
	dialect = csv.Sniffer().sniff(file.read(1024))
	file.seek(0)
	return dialect

	if __name__ == '__main__':
	main()