rgkimball/norvig.py

## norvig.py
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

COLW = 10

class Economy(object):
    plt.style.use('bmh')

    def __init__(self, population):
        self.n = len(population)
        self.population = population
        self.stages = [self.population]
        self.fig, self.axes = plt.subplots(nrows=2, ncols=2, figsize=(9, 4))
        self.stats = list()

    @staticmethod
    def transaction(a, b):
        s = a + b
        r = np.random.uniform(0, s)
        return r, s - r

    @staticmethod
    def gini(y):
        """Calculate the Gini coefficient of a numpy array.

         based on bottom eq: http://www.statsdirect.com/help/content/image/stat0206_wmf.gif
         from: http://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm

        :param y: 1-dimensional numpy array, input series
        :return: int, Gini coefficient
        """
        y = sorted(y)
        n = len(y)
        numer = 2 * sum((i + 1) * y[i] for i in range(n))
        denom = n * sum(y)
        return (numer / denom) - (n + 1) / n

    @staticmethod
    def auto_bin(narray, m=5):
        """ Modified Freedman-Diaconis rule to optimize # of histogram bins

        :param narray: numpy array
        :param m: int, optional; minimum number of bins to return
        :return: int, rounded number of optimal bins for histogram of sample
        """
        iqr = np.subtract(*np.percentile(narray, [75, 25]))
        return max(m, int(2 * iqr * narray.size ** (-1/3)))

    def simulate(self, generations, events):
        print(" ".join(self.cols))
        print(" ".join(["-" * len(x) for x in self.cols]))

        print(self.stat_row(0, 0))
        for _ in range(1, generations + 1):
            pop = self.stages[-1].copy()
            for __ in range(events):
                i, j = random.sample(range(len(pop)), 2)
                pop[i], pop[j] = self.transaction(pop[i], pop[j])
            self.stages.append(pop)
            x = self.stat_row(_, events * _)
            if _ % (generations * 0.05) == 0:
                print(x)
        self.plot(generations, events)

    def plot(self, g, e):
        # self.fig.suptitle("Simulation Summary Results", fontsize=14, fontweight='bold')

        # Starting population
        bins = self.auto_bin(self.stages[0])
        self.axes[0][0].hist(self.stages[0], edgecolor=(0, 0, .14),
                             bins=self.auto_bin(self.stages[0]),
                             range=(0, max(self.stages[-1])))
        self.axes[0][0].set_title("Starting population (n={:,.0f})".format(self.n), fontsize=10)
        self.axes[0][0].set_xlabel("Wealth")

        # Ending population
        self.axes[1][0].hist(self.stages[-1], color=(.7, 0, 0, 0.5),
                             edgecolor=(.14, 0, 0),
                             bins=self.auto_bin(self.stages[-1]),
                             range=(0, max(self.stages[-1])))
        self.axes[1][0].set_title("Ending population (generation={:,.0f}; transactions={:,.0f})".format(g, g * e), fontsize=10)
        self.axes[1][0].set_xlabel("Wealth")

        # Quantile dispersion
        self.summary()[self.cols[7:11]].plot(ax=self.axes[0][1])
        self.axes[0][1].set_title("Quantile Dispersion", fontsize=10)
        self.axes[0][1].set_ylabel("Wealth")

        # Gini index
        self.summary()[self.cols[3]].plot(ax=self.axes[1][1])
        self.axes[1][1].set_title("Wealth Inequality", fontsize=10)
        self.axes[1][1].set_ylabel("Gini Coefficient")
        self.axes[1][1].set_xlabel("Generation")

        plt.show()

    def summary(self):
        return pd.DataFrame(data=self.stats, columns=self.cols)

    cols = [x + " " * max(0, COLW - len(x)) for x in \
            ["generation", "cum events", "n", "G", "median", "stdev", "min", "25%", "50%", "90%", "99%", "max"]]

    def stat_row(self, i, j):
        """ Store and return summary statistics for most recent generation """
        p = self.stages[-1]

        self.stats.append([i, j, len(p), self.gini(p), np.median(p), np.std(p),
                           min(p), np.percentile(p, 25), np.percentile(p, 50),
                           np.percentile(p, 90), np.percentile(p, 99), max(p)])

        row = ["{:.2f}".format(x) for x in self.stats[-1]]
        return " ".join([" " * max(0, COLW - len(x)) + x for x in row])
	import random
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt

	COLW = 10

	class Economy(object):
	plt.style.use('bmh')

	def __init__(self, population):
	self.n = len(population)
	self.population = population
	self.stages = [self.population]
	self.fig, self.axes = plt.subplots(nrows=2, ncols=2, figsize=(9, 4))
	self.stats = list()

	@staticmethod
	def transaction(a, b):
	s = a + b
	r = np.random.uniform(0, s)
	return r, s - r

	@staticmethod
	def gini(y):
	"""Calculate the Gini coefficient of a numpy array.

	based on bottom eq: http://www.statsdirect.com/help/content/image/stat0206_wmf.gif
	from: http://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm

	:param y: 1-dimensional numpy array, input series
	:return: int, Gini coefficient
	"""
	y = sorted(y)
	n = len(y)
	numer = 2 * sum((i + 1) * y[i] for i in range(n))
	denom = n * sum(y)
	return (numer / denom) - (n + 1) / n

	@staticmethod
	def auto_bin(narray, m=5):
	""" Modified Freedman-Diaconis rule to optimize # of histogram bins

	:param narray: numpy array
	:param m: int, optional; minimum number of bins to return
	:return: int, rounded number of optimal bins for histogram of sample
	"""
	iqr = np.subtract(*np.percentile(narray, [75, 25]))
	return max(m, int(2 * iqr * narray.size ** (-1/3)))

	def simulate(self, generations, events):
	print(" ".join(self.cols))
	print(" ".join(["-" * len(x) for x in self.cols]))

	print(self.stat_row(0, 0))
	for _ in range(1, generations + 1):
	pop = self.stages[-1].copy()
	for __ in range(events):
	i, j = random.sample(range(len(pop)), 2)
	pop[i], pop[j] = self.transaction(pop[i], pop[j])
	self.stages.append(pop)
	x = self.stat_row(_, events * _)
	if _ % (generations * 0.05) == 0:
	print(x)
	self.plot(generations, events)

	def plot(self, g, e):
	# self.fig.suptitle("Simulation Summary Results", fontsize=14, fontweight='bold')

	# Starting population
	bins = self.auto_bin(self.stages[0])
	self.axes[0][0].hist(self.stages[0], edgecolor=(0, 0, .14),
	bins=self.auto_bin(self.stages[0]),
	range=(0, max(self.stages[-1])))
	self.axes[0][0].set_title("Starting population (n={:,.0f})".format(self.n), fontsize=10)
	self.axes[0][0].set_xlabel("Wealth")

	# Ending population
	self.axes[1][0].hist(self.stages[-1], color=(.7, 0, 0, 0.5),
	edgecolor=(.14, 0, 0),
	bins=self.auto_bin(self.stages[-1]),
	range=(0, max(self.stages[-1])))
	self.axes[1][0].set_title("Ending population (generation={:,.0f}; transactions={:,.0f})".format(g, g * e), fontsize=10)
	self.axes[1][0].set_xlabel("Wealth")

	# Quantile dispersion
	self.summary()[self.cols[7:11]].plot(ax=self.axes[0][1])
	self.axes[0][1].set_title("Quantile Dispersion", fontsize=10)
	self.axes[0][1].set_ylabel("Wealth")

	# Gini index
	self.summary()[self.cols[3]].plot(ax=self.axes[1][1])
	self.axes[1][1].set_title("Wealth Inequality", fontsize=10)
	self.axes[1][1].set_ylabel("Gini Coefficient")
	self.axes[1][1].set_xlabel("Generation")

	plt.show()

	def summary(self):
	return pd.DataFrame(data=self.stats, columns=self.cols)

	cols = [x + " " * max(0, COLW - len(x)) for x in \
	["generation", "cum events", "n", "G", "median", "stdev", "min", "25%", "50%", "90%", "99%", "max"]]

	def stat_row(self, i, j):
	""" Store and return summary statistics for most recent generation """
	p = self.stages[-1]

	self.stats.append([i, j, len(p), self.gini(p), np.median(p), np.std(p),
	min(p), np.percentile(p, 25), np.percentile(p, 50),
	np.percentile(p, 90), np.percentile(p, 99), max(p)])

	row = ["{:.2f}".format(x) for x in self.stats[-1]]
	return " ".join([" " * max(0, COLW - len(x)) + x for x in row])