Last active
December 18, 2015 08:19
-
-
Save tclancy/5753154 to your computer and use it in GitHub Desktop.
Really ugly and rough code to sort some content out of Excel
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
import operator | |
import os | |
from xlrd import open_workbook | |
""" | |
starting from the "raw" data, which I'm assuming is the first tab, Sorted by Customer | |
group everything by part number and get the total sold | |
then list the items in descending order of popularity | |
Do you need any other data beyond part and total sold? Looks like I should sum the Extended Price column as well? | |
""" | |
COL_QUANTITY = 1 | |
COL_ITEM = 2 | |
COL_DESCRIPTION = 9 | |
COL_EXTENDED_PRICE = 11 | |
wb = open_workbook('ytd_june.xls') | |
quantities = defaultdict(lambda: [0, 0.0, '']) | |
by_customer = wb.sheets()[0] | |
for row in range(1, by_customer.nrows): | |
qty = int(by_customer.cell(row, COL_QUANTITY).value) | |
item = str(by_customer.cell(row, COL_ITEM).value).strip() | |
price = float(by_customer.cell(row, COL_EXTENDED_PRICE).value) | |
current = quantities[item] | |
quantities[item][0] = current[0] + qty | |
quantities[item][1] = quantities[item][1] + price | |
quantities[item][2] = str(by_customer.cell(row, COL_DESCRIPTION).value).strip() | |
raw = [] | |
for k in quantities: | |
data = quantities[k] | |
raw.append([k, data[0], data[1], data[2]]) | |
f = open('sorted.csv', 'w') | |
f.write('Part,Description,Count,Total%s' % os.linesep) | |
for row in raw: | |
f.write('%s,%s,%d,%.2f%s' % (row[0], row[3], row[1], row[2], os.linesep)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment