Skip to content

Instantly share code, notes, and snippets.

@jmoy
Created March 5, 2011 06:14
Show Gist options
  • Save jmoy/856171 to your computer and use it in GitHub Desktop.
Save jmoy/856171 to your computer and use it in GitHub Desktop.
Compute the weighted median
"""
Calculate the weighted median
(c) Jyotirmoy Bhattacharya [jyotirmoy@jyotirmoy.net], 2011
Licence: GPL
"""
import itertools as it
def ecdf(vw):
ans = []
"""
Calculate the empirical cumulative distribution function
vw is a sequence of tuples (v,w) where v is a value and w is its weight
"""
sumweights = 0.0
vw=sorted(vw)
for k,g in it.groupby(vw,lambda t:t[0]):
weights=[t[1] for t in g]
if any(x is None for x in weights):
return None
sumweights+=sum(weights)
ans.append((k,sumweights))
return [(v,w/sumweights) for (v,w) in ans]
def wmedian(obs):
"""
Calculate the weighted median.
obs is a sequence of tuples (v,w) where v is a value and w is its weight.
"""
tol=1e-13 #To take care of rounding errors in ecdf
e = ecdf(obs)
if not e:
return None
for i in range(len(e)):
if e[i][1]>=0.5:
if abs(e[i][1]-0.5)<tol:
return (e[i+1][0]+e[i][0])/2
else:
return e[i][0]
if __name__=="__main__":
testcases = [[(1,1)],[(1,3)],[(1,4)],
[(1,1),(1,1),(1,1)],
[(2,1),(3,1),(1,1)],
[(2,2),(1,3),(3,1)],
]
for t in testcases:
print(t,wmedian(t))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment