Skip to content

Instantly share code, notes, and snippets.

@samba
Created March 9, 2018 06:10
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save samba/e7d341242df3f2b404f296c65ac09c52 to your computer and use it in GitHub Desktop.
Save samba/e7d341242df3f2b404f296c65ac09c52 to your computer and use it in GitHub Desktop.
Extract a subset of named fields from a CSV file
#!/usr/bin/env python
# Loads a CSV file, assuming it has header names, and prints only the specified columns.
# ... like selecting two columns of a 7-column table.
#
# Arguments:
# - field names, comma-separated
# - input filename
#
# Output:
# - the header row for the specified columns, and those fields for all input records.
#
# Usage:
# python csvextract.py "fieldName1,fieldName2" ./source.csv > output.csv
#
#
#
import sys
import csv
import fileinput
def parse_field_expression(expr):
return expr.split(',')
def perform_extract(field_expr, filename):
_input = fileinput.input(filename)
reader = csv.DictReader(_input)
expression = parse_field_expression(field_expr)
for row in reader:
yield dict((e, row[e]) for e in expression)
def main(args):
writer = None
fieldnames = None
for record in perform_extract(args[0], args[1]):
if writer is None:
fieldnames = record.keys()
writer = csv.DictWriter(sys.stdout,
fieldnames=fieldnames)
try:
writer.writerow(record)
except ValueError, e:
print >>sys.stderr, repr(record)
raise e
if __name__ == '__main__':
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment