Skip to content

Instantly share code, notes, and snippets.

@briandailey
Created March 22, 2012 13:52
Show Gist options
  • Save briandailey/2158466 to your computer and use it in GitHub Desktop.
Save briandailey/2158466 to your computer and use it in GitHub Desktop.
Pandas = Magic.
from pandas import read_csv
df = read_csv('all_hospitals.csv')
def by_hcpcs_code():
hcpcs_table = {}
grouped_by_upin = df.groupby('PUF_PROVIDER_NUMBER')
for upin, by_upin in grouped_by_upin:
hcpcs_table.setdefault(upin, {})
for i in range(1, 49):
grouped_by_hcpcs = by_upin.groupby('SERVICE_HCPCS_%s' % i)
for hcpcs, details in grouped_by_hcpcs:
hcpcs_table[upin].setdefault(hcpcs, {})
count = details['SERVICE_HCPCS_%s' % i].count()
charges = details['SERVICE_TOTAL_CHARGES_%s' % i].sum()
costs = details['SERVICE_COST_%s' % i].sum()
hcpcs_table[upin][hcpcs]['count'] = hcpcs_table[upin][hcpcs].setdefault('count', 0) + count
hcpcs_table[upin][hcpcs]['charges'] = hcpcs_table[upin][hcpcs].setdefault('charges', 0) + charges
hcpcs_table[upin][hcpcs]['costs'] = hcpcs_table[upin][hcpcs].setdefault('costs', 0) + costs
print "UPIN,HCPCS,count,charges,cost"
for upin, hcpcs_codes in hcpcs_table.items():
for hcpcs_code, details in hcpcs_codes.items():
print "%s,%s,%s,%s,%s" % (upin, hcpcs_code, details['count'], details['charges'], details['costs'])
def by_service_code():
table = {}
grouped_by_upin = df.groupby('PUF_PROVIDER_NUMBER')
for upin, by_upin in grouped_by_upin:
table.setdefault(upin, {})
for i in range(1, 49):
grouped_by_sc = by_upin.groupby('SERVICE_REVENUE_CODE_%s' % i)
for sc, details in grouped_by_sc:
table[upin].setdefault(sc, {})
count = details['SERVICE_REVENUE_CODE_%s' % i].count()
charges = details['SERVICE_TOTAL_CHARGES_%s' % i].sum()
costs = details['SERVICE_COST_%s' % i].sum()
table[upin][sc]['count'] = table[upin][sc].setdefault('count', 0) + count
table[upin][sc]['charges'] = table[upin][sc].setdefault('charges', 0) + charges
table[upin][sc]['costs'] = table[upin][sc].setdefault('costs', 0) + costs
print "UPIN,Service Code,count,charges,cost"
for upin, sc_codes in table.items():
for sc_code, details in sc_codes.items():
print "%s,%s,%s,%s,%s" % (upin, str(int(sc_code)).zfill(4), details['count'], details['charges'], details['costs'])
by_service_code()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment