Skip to content

Instantly share code, notes, and snippets.

@jehiah
Last active February 10, 2017 01:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jehiah/b65aab85d0a21356e4244bf36803d9ca to your computer and use it in GitHub Desktop.
Save jehiah/b65aab85d0a21356e4244bf36803d9ca to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
"""
Pivot a CSV file
$ cat <<EOF | python pivot_csv.py --numeric --headers
> row,column,value
> r1,c1,1
> r1,c2,1
> r1,c1,5
> r2,c2,5
> r3,c1,2
> EOF
row,c1,c2
r1,6,1
r3,2,
r2,,5
"""
import sys
import csv
import tornado.options
from collections import defaultdict
from decimal import Decimal
def run():
o = tornado.options.options
if o.numeric:
data = defaultdict(lambda: defaultdict(Decimal))
else:
data = defaultdict(dict)
columns = set()
headers = None
for line in csv.reader(sys.stdin):
if headers is None and o.headers:
headers = line
continue
columns.add(line[o.pivot_column])
if o.numeric:
data[line[o.row_column]][line[o.pivot_column]] += Decimal(line[o.value_column])
else:
data[line[o.row_column]][line[o.pivot_column]] = line[o.value_column]
columns = sorted(columns)
if o.reverse_column_sort:
columns = reversed(columns)
w = csv.writer(sys.stdout)
row_key = headers[o.row_column] if headers else 'key'
w.writerow([row_key] + columns)
for key, row_data in sorted(data.items()):
row = [key]
for column in columns:
row.append(str(row_data.get(column, '')))
w.writerow(row)
if __name__ == "__main__":
tornado.options.define("headers", default=False, type=bool, help="first input row contains column headers")
tornado.options.define("row_column", default=0, type=int, help="key for rows (starting at 0)")
tornado.options.define("pivot_column", default=1, type=int, help="column to pivot on (starting at 0)")
tornado.options.define("value_column", default=2, type=int, help="value for rows (starting at 0)")
tornado.options.define("numeric", default=True, type=bool, help="converts value to Decimal (and sum's if necesary)")
tornado.options.define("reverse_column_sort", default=False, type=bool, help="reverse the column sort order")
tornado.options.parse_command_line()
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment