pgorczak/uniq.py

## uniq.py
#! /usr/bin/env python

import argparse
import csv
import itertools as itt
import sys

parser = argparse.ArgumentParser(description=
'''Eliminate runs of identical values from csv data. Matching lines are
merged to the first occurrence.''')

parser.add_argument('name', type=str, nargs=1,
    help='name of the column to be processed')
parser.add_argument('infile', help='input file', nargs=1,
    type=argparse.FileType('r'))
parser.add_argument('outfile', help='output file', nargs='?',
    type=argparse.FileType('w'), default=sys.stdout)

args = parser.parse_args()

reader = csv.reader(*args.infile)
names = reader.next()
name = args.name[0]

try:
    index = names.index(name)
except ValueError:
    not_found = KeyError('Specified name "{}" not found in the file (names are: {})'.format(name, ', '.join(names)))
    raise not_found

# get key value from csv row
get_value = lambda e: e[index]
# group subsequent elements by value
grouped = itt.groupby(reader, get_value)
# get the first element of every group:
get_first = lambda key, group: next(group)
unique = itt.starmap(get_first, grouped)

writer = csv.writer(args.outfile)
# names in the first row of output csv
writer.writerow(names)
# work it
writer.writerows(unique)
	#! /usr/bin/env python

	import argparse
	import csv
	import itertools as itt
	import sys

	parser = argparse.ArgumentParser(description=
	'''Eliminate runs of identical values from csv data. Matching lines are
	merged to the first occurrence.''')

	parser.add_argument('name', type=str, nargs=1,
	help='name of the column to be processed')
	parser.add_argument('infile', help='input file', nargs=1,
	type=argparse.FileType('r'))
	parser.add_argument('outfile', help='output file', nargs='?',
	type=argparse.FileType('w'), default=sys.stdout)

	args = parser.parse_args()

	reader = csv.reader(*args.infile)
	names = reader.next()
	name = args.name[0]

	try:
	index = names.index(name)
	except ValueError:
	not_found = KeyError('Specified name "{}" not found in the file (names are: {})'.format(name, ', '.join(names)))
	raise not_found

	# get key value from csv row
	get_value = lambda e: e[index]
	# group subsequent elements by value
	grouped = itt.groupby(reader, get_value)
	# get the first element of every group:
	get_first = lambda key, group: next(group)
	unique = itt.starmap(get_first, grouped)

	writer = csv.writer(args.outfile)
	# names in the first row of output csv
	writer.writerow(names)
	# work it
	writer.writerows(unique)