Skip to content

Instantly share code, notes, and snippets.

@jessykate
Created November 8, 2016 12:45
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save jessykate/1587f9345d520474709a8e800a222354 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import sys
import csv
'''
usage: ./cobudget_merge.py file1.csv file2.csv ... fileN.csv
combine cobudget allocations across mulitple files and print result to stdout.
take the full outer join of the rows in each file, *summing* balances if an
email is present multiple times.
Assumes each file has NO headers, and the first two columns are as follows:
email, amount.
Other columns may be present and will be ignored. If a row has a $0 amount
value, it will be skipped.
'''
allocations = {}
# start with the second argument (index 1), and iterate until there's no more
# files.
num_files = len(sys.argv)
total_allocated = 0
for n in range(1,num_files):
the_file = sys.argv[n]
sys.stderr.write("\n\n**** %s ****\n\n" % the_file)
open_file = open(the_file, 'r')
the_csv = csv.reader(open_file, delimiter=",")
allocated_this_file = 0
for row in the_csv:
try:
# row is a list of strings
# row[0] is email
# row[1] is amount
# other rows MAY be present, this specifically allows for that and will ignore them.
# strip any surrounding whitespace
email = row[0].strip()
new_allocation = float(row[1])
if new_allocation == 0:
continue
except:
# if there's a comment row or some other non conforming row in the
# file, skip it.
continue
balance = allocations.get(email, 0.0)
if balance > 0:
sys.stderr.write('%s: found existing balance of %s. appending new allocation of $%f.\n' % (email, balance, new_allocation))
else:
sys.stderr.write("adding new user %s with allocation of $%s\n" % (email, new_allocation))
allocations[email] = balance + new_allocation
allocated_this_file += new_allocation
sys.stderr.write("\nAllocated this file: %f\n" % allocated_this_file)
total_allocated += allocated_this_file
sys.stderr.write("\nTotal allocated: %f\n" % total_allocated)
sys.stderr.write("\n\n")
emails = allocations.keys()
all_emails = ",".join(emails)
sys.stderr.write("\nAll Group Emails (should you want to bulk email your group):\n\n")
sys.stderr.write("%s\n" % all_emails)
sys.stderr.write("\n\n")
for email, amount in allocations.iteritems():
print "%s,%f" % (email, amount)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment