ryu577/median_of_medians.py

## median_of_medians.py
import numpy as np
from scipy.stats import lognorm, fisk
import matplotlib.pyplot as plt


def plot_bias(prcnt=90, dist=lognorm):
    real_val = dist.ppf(prcnt/100, 1, 0)
    for sampl in np.arange(11, 77, 4):
        errs = []
        ests = []
        for _ in range(100000):
            x = dist.rvs(1, 0, size=sampl)
            #est_val = estimate_median(x)
            est_val = np.percentile(x, prcnt)
            err = (real_val-est_val)/real_val
            errs.append(err)
            ests.append(est_val)

        print(np.mean(errs))

        plt.hist(ests, bins=np.arange(0, 3, .1))
        plt.axvline(real_val, label="actual percentile", color="black")
        plt.axvline(np.mean(ests),
                    label="avg estimated value of percentile on sample size: "
                    + str(sampl), color="purple")
        plt.axvline(np.percentile(ests, 50),
                    label=("Median"
                    "estimated value of "
                    "percentile on sample size: ")
                    + str(sampl), color="orange")
        plt.legend()
        plt.title("Sample size = " + str(sampl))
        plt.savefig('plots/sample_' + str(sampl) + '.png')
        plt.close()
        print('processed sample size ' + str(sampl))


if __name__ == "__main__":
    plot_bias(50, dist=fisk)
	import numpy as np
	from scipy.stats import lognorm, fisk
	import matplotlib.pyplot as plt


	def plot_bias(prcnt=90, dist=lognorm):
	real_val = dist.ppf(prcnt/100, 1, 0)
	for sampl in np.arange(11, 77, 4):
	errs = []
	ests = []
	for _ in range(100000):
	x = dist.rvs(1, 0, size=sampl)
	#est_val = estimate_median(x)
	est_val = np.percentile(x, prcnt)
	err = (real_val-est_val)/real_val
	errs.append(err)
	ests.append(est_val)

	print(np.mean(errs))

	plt.hist(ests, bins=np.arange(0, 3, .1))
	plt.axvline(real_val, label="actual percentile", color="black")
	plt.axvline(np.mean(ests),
	label="avg estimated value of percentile on sample size: "
	+ str(sampl), color="purple")
	plt.axvline(np.percentile(ests, 50),
	label=("Median"
	"estimated value of "
	"percentile on sample size: ")
	+ str(sampl), color="orange")
	plt.legend()
	plt.title("Sample size = " + str(sampl))
	plt.savefig('plots/sample_' + str(sampl) + '.png')
	plt.close()
	print('processed sample size ' + str(sampl))


	if __name__ == "__main__":
	plot_bias(50, dist=fisk)