Skip to content

Instantly share code, notes, and snippets.

@pgorczak
Created May 2, 2015 12:14
Show Gist options
  • Save pgorczak/6526b61ff58550c6b9ae to your computer and use it in GitHub Desktop.
Save pgorczak/6526b61ff58550c6b9ae to your computer and use it in GitHub Desktop.
Easy uniq for csv files
#! /usr/bin/env python
import argparse
import csv
import itertools as itt
import sys
parser = argparse.ArgumentParser(description=
'''Eliminate runs of identical values from csv data. Matching lines are
merged to the first occurrence.''')
parser.add_argument('name', type=str, nargs=1,
help='name of the column to be processed')
parser.add_argument('infile', help='input file', nargs=1,
type=argparse.FileType('r'))
parser.add_argument('outfile', help='output file', nargs='?',
type=argparse.FileType('w'), default=sys.stdout)
args = parser.parse_args()
reader = csv.reader(*args.infile)
names = reader.next()
name = args.name[0]
try:
index = names.index(name)
except ValueError:
not_found = KeyError('Specified name "{}" not found in the file (names are: {})'.format(name, ', '.join(names)))
raise not_found
# get key value from csv row
get_value = lambda e: e[index]
# group subsequent elements by value
grouped = itt.groupby(reader, get_value)
# get the first element of every group:
get_first = lambda key, group: next(group)
unique = itt.starmap(get_first, grouped)
writer = csv.writer(args.outfile)
# names in the first row of output csv
writer.writerow(names)
# work it
writer.writerows(unique)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment