Skip to content

Instantly share code, notes, and snippets.

@mdellavo
Created October 12, 2010 03:11
Show Gist options
  • Save mdellavo/621609 to your computer and use it in GitHub Desktop.
Save mdellavo/621609 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
from csv import DictReader, DictWriter
import sys
import re
from pprint import pprint
from datetime import datetime
parse_datetime = lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S')
types = { re.compile(r'^\d+$') : int,
re.compile(r'^[+-]?\ *(\d+(\.\d*)?|\.\d+)$') : float,
re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}$') : parse_datetime }
def parse_field(v):
for pattern in types:
if pattern.match(v):
return types[pattern](v)
return v
def main(option, args):
f_in = sys.stdin if args[0] == '-' else open(args[0], 'r')
f_out = sys.stdout if args[1] == '-' else open(args[1], 'w')
filter_func = (lambda x: eval(args[2], {}, x)) if len(args) > 2 else (lambda x: True)
map_func = (lambda x: eval(args[3], {}, x)) if len(args) > 3 else (lambda x: x)
reader = DictReader(f_in)
writer = DictWriter(f_out, reader.fieldnames)
rows = ( dict( (k, parse_field(row[k])) for k in row ) for row in reader )
writer.writerow( dict((i,i) for i in reader.fieldnames) )
writer.writerows( map_func(row) for row in rows if filter_func(row) )
return 0
if __name__ == '__main__':
rv = main(None, sys.argv[1:])
sys.exit(rv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment