Skip to content

Instantly share code, notes, and snippets.

@robcowie
Created May 22, 2011 20:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save robcowie/985882 to your computer and use it in GitHub Desktop.
Save robcowie/985882 to your computer and use it in GitHub Desktop.
Answer to stackoverflow question 6089772
field0 field1 field2 field3 field4 field5 field6 field7 field8 field9 field10 field11 field12 field13 field14 field15
10 0 2 1 Right Right Right 5.76765674196 0.0310912272139 0.0573603238282 0.0582901376612 0.0648936500524 0.0655294305058 0.0720571099855 0.0748289246137 0.446033755751
3 1 3 0 Left Left Right 8.00982745764 0.0313840132052 0.0576521406854 0.0585844966069 0.0644905497442 0.0653386429438 0.0712603578765 0.0740345755708 0.2641076191
5 19 1 0 Right Left Left 4.69440026591 0.0313852052224 0.0583165354345 0.0592403274967 0.0659404609478 0.0666070804916 0.0715314027001 0.0743022054775 0.465994962101
3 1 4 2 Left Right Left 9.58648184552 0.0303649003017 0.0571579895338 0.0580911765412 0.0634304670863 0.0640132919609 0.0702920967445 0.0730697946335 0.556525293
9 0 0 7 Left Left Left 7.65374257547 0.030318719717 0.0568551744109 0.0577785415066 0.0640577002605 0.0647226582655 0.0711459854908 0.0739256050784 1.23421547397
# -*- coding: utf-8 -*-
from glob import glob
from os import path
import csv
from collections import namedtuple, defaultdict
def type_fix_value(value):
_types = (int, float)
for _type in _types:
try:
return _type(value)
except ValueError:
continue
return value
def parse_row(row):
return [type_fix_value(field) for field in row]
def main(args):
files = glob( path.join(args.filedir, '*.csv') )
for datafile in files:
with open(datafile) as srcfile:
headers = srcfile.next().strip().split('\t')
datarow = namedtuple('datarow', headers)
data = csv.reader(srcfile, delimiter='\t')
## wrap with row parser and namedtuple
data = (parse_row(row) for row in data)
data = (datarow(*row) for row in data)
## Group by the leading integer columns
grouped_rows = defaultdict(list)
for row in data:
integer_fields = [field for field in row if isinstance(field, int)]
grouped_rows[tuple(integer_fields)].append(row)
## DO SOMETHING INTERESTING WITH THE GROUPS
import pprint
pprint.pprint(dict(grouped_rows))
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--file-dir', dest='filedir')
args = parser.parse_args()
if not path.isdir(args.filedir):
parser.error('--file-dir should point to a directory')
main(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment