Skip to content

Instantly share code, notes, and snippets.

@zhangqiaorjc
Created December 20, 2016 05:19
Show Gist options
  • Save zhangqiaorjc/55e3a71cf19ce41d981a8013e6873a09 to your computer and use it in GitHub Desktop.
Save zhangqiaorjc/55e3a71cf19ce41d981a8013e6873a09 to your computer and use it in GitHub Desktop.
Sample from an empirical cdf specified by a set of points
import numpy as np
import numpy.random as random
from scipy.stats import rv_continuous
class custom_distribution:
def __init__(self, rng, xp, fp):
"""takes x, y points of cdf"""
np.all(np.diff(xp) > 0)
self.rng = rng
self.xp = xp
self.fp = fp
def sample(self, size=1):
sampled_prob = self.rng.uniform(0, 1, size)
# use interp func to find x given y
sampled_x = [np.interp(prob, self.fp, self.xp) for prob in sampled_prob]
return sampled_x
if __name__ == "__main__":
xp =[0, 10000, 20000, 30000, 50000, 80000, 200000, 1e+06, 2e+06, 5e+06, 1e+07, 3e+07]
fp = [0, 0.15, 0.2, 0.3, 0.4, 0.53, 0.6, 0.7, 0.8, 0.9, 0.97, 1]
rng = random.RandomState(seed=1)
custom_d = custom_distribution(rng, xp, fp)
sampled_points = custom_d.sample(size=10000)
print np.percentile(sampled_points, 20)
print np.percentile(sampled_points, 40)
print np.percentile(sampled_points, 80)
# 19978.8514227
# 50131.0919101
# 1964479.08035
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment