dnorton/parse_csv.py

## parse_csv.py
from __future__ import with_statement
import csv
import re
"""
Parse a CSV file with multiple column counts

e.g., foo_1, foo_2

The result will be a single column, foo

Values will be "; " separated
"""

# set up the map
header_map = {}
with open('test.csv', 'r') as myfile:
    for field in myfile.readline().strip().split("|"):
        pattern = re.compile('(\w+)_\d+')
        match = pattern.search(field)
        if match is not None:
            # print match.groups()
            header_key = match.group(1)
        else:
            header_key = field

        print "header_key = %s" % header_key

        if not header_map.has_key(header_key):
            header_map[header_key] = []

        header_map[header_key].append(field)

print header_map

csv.register_dialect('pipes', delimiter="|")

with open('test.csv') as csvfile:
    # testreader = csv.reader(csvfile)
    testreader = csv.DictReader(csvfile, dialect='pipes')
    for row in testreader:
        print row.keys()

## test.csv
foo_1|foo_2|foo_3|bar_1|bar_2|value
a|b|c|teapot|kettle|alone
1|6|zebra|coffee|filter|always
	from __future__ import with_statement
	import csv
	import re
	"""
	Parse a CSV file with multiple column counts

	e.g., foo_1, foo_2

	The result will be a single column, foo

	Values will be "; " separated
	"""

	# set up the map
	header_map = {}
	with open('test.csv', 'r') as myfile:
	for field in myfile.readline().strip().split("\|"):
	pattern = re.compile('(\w+)_\d+')
	match = pattern.search(field)
	if match is not None:
	# print match.groups()
	header_key = match.group(1)
	else:
	header_key = field

	print "header_key = %s" % header_key

	if not header_map.has_key(header_key):
	header_map[header_key] = []

	header_map[header_key].append(field)

	print header_map

	csv.register_dialect('pipes', delimiter="\|")

	with open('test.csv') as csvfile:
	# testreader = csv.reader(csvfile)
	testreader = csv.DictReader(csvfile, dialect='pipes')
	for row in testreader:
	print row.keys()
	foo_1\|foo_2\|foo_3\|bar_1\|bar_2\|value
	a\|b\|c\|teapot\|kettle\|alone
	1\|6\|zebra\|coffee\|filter\|always