tclancy/gist:5753154

## gistfile1.py
from collections import defaultdict
import operator
import os
from xlrd import open_workbook

"""
starting from the "raw" data, which I'm assuming is the first tab, Sorted by Customer
group everything by part number and get the total sold
then list the items in descending order of popularity
Do you need any other data beyond part and total sold? Looks like I should sum the Extended Price column as well?
"""

COL_QUANTITY = 1
COL_ITEM = 2
COL_DESCRIPTION = 9
COL_EXTENDED_PRICE = 11

wb = open_workbook('ytd_june.xls')
quantities = defaultdict(lambda: [0, 0.0, ''])
by_customer = wb.sheets()[0]
for row in range(1, by_customer.nrows):
    qty = int(by_customer.cell(row, COL_QUANTITY).value)
    item = str(by_customer.cell(row, COL_ITEM).value).strip()
    price = float(by_customer.cell(row, COL_EXTENDED_PRICE).value)
    current = quantities[item]
    quantities[item][0] = current[0] + qty
    quantities[item][1] = quantities[item][1] + price
    quantities[item][2] = str(by_customer.cell(row, COL_DESCRIPTION).value).strip()

raw = []
for k in quantities:
    data = quantities[k]
    raw.append([k, data[0], data[1], data[2]])


f = open('sorted.csv', 'w')
f.write('Part,Description,Count,Total%s' % os.linesep)
for row in raw:
    f.write('%s,%s,%d,%.2f%s' % (row[0], row[3], row[1], row[2], os.linesep))
	from collections import defaultdict
	import operator
	import os
	from xlrd import open_workbook

	"""
	starting from the "raw" data, which I'm assuming is the first tab, Sorted by Customer
	group everything by part number and get the total sold
	then list the items in descending order of popularity
	Do you need any other data beyond part and total sold? Looks like I should sum the Extended Price column as well?
	"""

	COL_QUANTITY = 1
	COL_ITEM = 2
	COL_DESCRIPTION = 9
	COL_EXTENDED_PRICE = 11

	wb = open_workbook('ytd_june.xls')
	quantities = defaultdict(lambda: [0, 0.0, ''])
	by_customer = wb.sheets()[0]
	for row in range(1, by_customer.nrows):
	qty = int(by_customer.cell(row, COL_QUANTITY).value)
	item = str(by_customer.cell(row, COL_ITEM).value).strip()
	price = float(by_customer.cell(row, COL_EXTENDED_PRICE).value)
	current = quantities[item]
	quantities[item][0] = current[0] + qty
	quantities[item][1] = quantities[item][1] + price
	quantities[item][2] = str(by_customer.cell(row, COL_DESCRIPTION).value).strip()

	raw = []
	for k in quantities:
	data = quantities[k]
	raw.append([k, data[0], data[1], data[2]])


	f = open('sorted.csv', 'w')
	f.write('Part,Description,Count,Total%s' % os.linesep)
	for row in raw:
	f.write('%s,%s,%d,%.2f%s' % (row[0], row[3], row[1], row[2], os.linesep))