Skip to content

Instantly share code, notes, and snippets.

@hirokiky
Created April 28, 2017 05:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hirokiky/8d7faf0017214600f021644d9f065d65 to your computer and use it in GitHub Desktop.
Save hirokiky/8d7faf0017214600f021644d9f065d65 to your computer and use it in GitHub Desktop.
List of dict => dict of dict aggregator.
import statistics
class Field:
def __init__(self, field, agg_func=lambda l: l[0]):
self.field = field
self.agg_func = agg_func
def aggregate(self, l):
return self.agg_func(l)
class Mean(Field):
def aggregate(self, l):
l = [e if e else 0 for e in l]
return statistics.mean(l)
class Median(Field):
def aggregate(self, l):
l = [e if e else 0 for e in l]
return statistics.median(l)
class Count(Field):
def aggregate(self, l):
return len(l)
def agg(data, key, **fields):
fields = {
n: Field(f) if isinstance(f, str) else f
for n, f in fields.items()
}
agged = {}
for d in data:
if d[key] in agged:
for name, field in fields.items():
agged[d[key]][name].append(d[field.field])
else:
agged[d[key]] = {
name: [d[field.field]]
for name, field in fields.items()
}
for k, d in agged.items():
for n, f in fields.items():
d[n] = f.aggregate(d[n])
return agged
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment