Skip to content

Instantly share code, notes, and snippets.

@jbarratt
Created March 20, 2013 03:46
Show Gist options
  • Save jbarratt/5202155 to your computer and use it in GitHub Desktop.
Save jbarratt/5202155 to your computer and use it in GitHub Desktop.
Summing by column with pandas
#!/usr/bin/env python
from collections import Counter
import pandas as pd
import glob
""" Open a bunch of .csv files which have columns that look like
startdate, enddate, account, service, 2010-01, 2010-02, 2010-03, ...
Sum the date-like columns across all of the .csv files and display.
"""
def main():
bymonth = Counter()
for csv in glob.glob('historical_payments/*.csv'):
# skiprows is because these .csv files start with a comment line
tab = pd.read_csv(csv, skiprows=1)
for month, amount in tab.filter(regex='\d{4}-\d{2}').sum(axis=0).iteritems():
bymonth[month] += amount
for month in sorted(bymonth.iterkeys()):
print "%s: %0.2f" % (month, bymonth[month])
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment