cstrelioff/README.md

## README.md

      
    Raw
  

              README.md
            
          
    A script that replicates all examples in my blog post on inferring probabilities--
see my bayes post for more information.
Run all the examples

    $ python ex001_bayes.py
Or,
    $ chmod u+x ex001_bayes.py
    $ ./ex001_bayes.py
Use the classes defined in the file

Start Python (or ipython if you like) in the directory containing the ex001_bayes.py file.
    $ python
Then import the file and try out a new example by

creating new data
specifying a prior
creating a posterior
plotting the results of inference

    >>> import numpy as np
    >>> from ex001_bayes import prior, posterior
    >>> data = np.random.choice([0,1], 500, p=[0.01, 0.99])
    >>> pri = prior(np.arange(0.0, 1.01, 0.01))
    >>> post = posterior(data, pri)
    >>> post.plot()
That's it!

  
## ex001_bayes.py
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# vim:fenc=utf-8
#
# Copyright © 2014 Christopher C. Strelioff <chris.strelioff@gmail.com>
#
# Distributed under terms of the MIT license.

"""
ex001_bayes.py
"""
from __future__ import division, print_function
import numpy as np
import matplotlib.pyplot as plt

# use matplotlib style sheet
try:
    plt.style.use('ggplot')
except:
    # version of matplotlib might not be recent
    pass


class likelihood:
    def __init__(self, data):
        """Likelihood for binary data."""
        self.counts = {s:0 for s in ['0', '1']}
        self._process_data(data)

    def _process_data(self, data):
        """Process data."""
        temp = [str(x) for x in data]
        for s in ['0', '1']:
            self.counts[s] = temp.count(s)

        if len(temp) != sum(self.counts.values()):
            raise Exception("Passed data is not all 0`s and 1`s!")

    def _process_probabilities(self, p0):
        """Process probabilities."""
        n0 = self.counts['0']
        n1 = self.counts['1']

        if p0 != 0 and p0 != 1:
            # typical case
            logpr_data = n0*np.log(p0) + \
                         n1*np.log(1.-p0)
            pr_data = np.exp(logpr_data)
        elif p0 == 0 and n0 != 0:
            # p0 can't be 0 if n0 is not 0
            logpr_data = -np.inf
            pr_data = np.exp(logpr_data)
        elif p0 == 0 and n0 == 0:
            # data consistent with p0=0
            logpr_data = n1*np.log(1.-p0)
            pr_data = np.exp(logpr_data)
        elif p0 == 1 and n1 != 0:
            # p0 can't be 1 if n1 is not 0
            logpr_data = -np.inf
            pr_data = np.exp(logpr_data)
        elif p0 == 1 and n1 == 0:
            # data consistent with p0=1
            logpr_data = n0*np.log(p0)
            pr_data = np.exp(logpr_data)

        return pr_data, logpr_data

    def prob(self, p0):
        """Get probability of data."""
        pr_data, _ = self._process_probabilities(p0)

        return pr_data

    def log_prob(self, p0):
        """Get log of probability of data."""
        _, logpr_data = self._process_probabilities(p0)

        return logpr_data


class prior:
    def __init__(self, p_list, p_probs=None):
        """The prior.

        p_list: list of allowed p0's
        p_probs: [optional] dict of prior probabilities
                 default is uniform
        """
        if p_probs:
            # make sure prior is normalized
            norm = sum(p_probs.values())
            self.log_pdict = {p:np.log(p_probs[p]) - \
                                np.log(norm) for p in p_list}
        else:
            n = len(p_list)
            self.log_pdict = {p:-np.log(n) for p in p_list}

    def __iter__(self):
        return iter(sorted(self.log_pdict))

    def log_prob(self, p):
        """Get log prior probability for passed p0."""
        if p in self.log_pdict:
            return self.log_pdict[p]
        else:
            return -np.inf

    def prob(self, p):
        """Get prior probability for passed p0."""
        if p in self.log_pdict:
            return np.exp(self.log_pdict[p])
        else:
            return 0.0


class posterior:
    def __init__(self, data, prior):
        """The posterior.

        data: a data sample as list
        prior: an instance of the prior class
        """
        self.likelihood = likelihood(data)
        self.prior = prior

        self._process_posterior()

    def _process_posterior(self):
        """Process the posterior using passed data and prior."""

        numerators = {}
        denominator = -np.inf
        for p in self.prior:
            numerators[p] = self.likelihood.log_prob(p) + \
                            self.prior.log_prob(p)

            if numerators[p] != -np.inf:
                # np.logaddexp(-np.inf, -np.inf) issues warning
                # skip-- this is adding 0 + 0
                denominator = np.logaddexp(denominator,
                                           numerators[p])

        # save denominator in Bayes' Theorem
        self.log_marg_likelihood = denominator

        # calculate posterior
        self.log_pdict = {}
        for p in self.prior:
            self.log_pdict[p] = numerators[p] - \
                                self.log_marg_likelihood

    def log_prob(self, p):
        """Get log posterior probability for passed p."""
        if p in self.log_pdict:
            return self.log_pdict[p]
        else:
            return -np.inf

    def prob(self, p):
        """Get posterior probability for passed p."""
        if p in self.log_pdict:
            return np.exp(self.log_pdict[p])
        else:
            return 0.0

    def plot(self):
        """Plot the inference resuults."""

        f, ax= plt.subplots(3, 1, figsize=(8, 6), sharex=True)

        # get candidate probabilities from prior
        x = [p for p in self.prior]

        # plot prior
        y1 = np.array([self.prior.prob(p) for p in x])
        ax[0].stem(x, y1, linefmt='r-', markerfmt='ro', basefmt='w-')
        ax[0].set_ylabel("Prior", fontsize=14)
        ax[0].set_xlim(-0.05, 1.05)
        ax[0].set_ylim(0., 1.05*np.max(y1))

        # plot likelihood
        y2 = np.array([self.likelihood.prob(p) for p in x])
        ax[1].stem(x, y2, linefmt='k-', markerfmt='ko', basefmt='w-')
        ax[1].set_ylabel("Likelihood", fontsize=14)
        ax[1].set_xlim(-0.05, 1.05)
        ax[1].set_ylim(0., 1.05*np.max(y2))

        # plot posterior
        y3 = np.array([self.prob(p) for p in x])
        ax[2].stem(x, y3, linefmt='b-', markerfmt='bo', basefmt='w-')
        ax[2].set_ylabel("Posterior", fontsize=14)
        ax[2].set_xlabel("Probability of Zero", fontsize=14)
        ax[2].set_xlim(-0.05, 1.05)
        ax[2].set_ylim(0., 1.05*np.max(y3))

        plt.tight_layout()
        plt.show()

if __name__ ==  '__main__':
    # data
    data1 = [0,0,0,0,1,1,0,0,0,1]

    # prior
    A1 = prior([0.2, 0.4, 0.6, 0.8])

    # posterior
    post1 = posterior(data1, A1)
    post1.plot()

    # prior -- will be normalized by class
    A2 = prior([0.2, 0.4, 0.6, 0.8],
               {0.2:1, 0.4:20, 0.6:1, 0.8:1})

    # posterior
    post2 = posterior(data1, A2)
    post2.plot()

    # set probability of 0
    p0 = 0.23
    # set rng seed to 42
    np.random.seed(42)
    # generate data
    data2 = np.random.choice([0,1], 500, p=[p0, 1.-p0])

    # prior
    A3 = prior(np.arange(0.0, 1.01, 0.01))

    # posterior
    post3 = posterior(data2, A3)
    post3.plot()
	#! /usr/bin/env python
	# -- coding: utf-8 --
	# vim:fenc=utf-8
	#
	# Copyright © 2014 Christopher C. Strelioff <chris.strelioff@gmail.com>
	#
	# Distributed under terms of the MIT license.

	"""
	ex001_bayes.py
	"""
	from __future__ import division, print_function
	import numpy as np
	import matplotlib.pyplot as plt

	# use matplotlib style sheet
	try:
	plt.style.use('ggplot')
	except:
	# version of matplotlib might not be recent
	pass


	class likelihood:
	def __init__(self, data):
	"""Likelihood for binary data."""
	self.counts = {s:0 for s in ['0', '1']}
	self._process_data(data)

	def _process_data(self, data):
	"""Process data."""
	temp = [str(x) for x in data]
	for s in ['0', '1']:
	self.counts[s] = temp.count(s)

	if len(temp) != sum(self.counts.values()):
	raise Exception("Passed data is not all 0`s and 1`s!")

	def _process_probabilities(self, p0):
	"""Process probabilities."""
	n0 = self.counts['0']
	n1 = self.counts['1']

	if p0 != 0 and p0 != 1:
	# typical case
	logpr_data = n0*np.log(p0) + \
	n1*np.log(1.-p0)
	pr_data = np.exp(logpr_data)
	elif p0 == 0 and n0 != 0:
	# p0 can't be 0 if n0 is not 0
	logpr_data = -np.inf
	pr_data = np.exp(logpr_data)
	elif p0 == 0 and n0 == 0:
	# data consistent with p0=0
	logpr_data = n1*np.log(1.-p0)
	pr_data = np.exp(logpr_data)
	elif p0 == 1 and n1 != 0:
	# p0 can't be 1 if n1 is not 0
	logpr_data = -np.inf
	pr_data = np.exp(logpr_data)
	elif p0 == 1 and n1 == 0:
	# data consistent with p0=1
	logpr_data = n0*np.log(p0)
	pr_data = np.exp(logpr_data)

	return pr_data, logpr_data

	def prob(self, p0):
	"""Get probability of data."""
	pr_data, _ = self._process_probabilities(p0)

	return pr_data

	def log_prob(self, p0):
	"""Get log of probability of data."""
	_, logpr_data = self._process_probabilities(p0)

	return logpr_data


	class prior:
	def __init__(self, p_list, p_probs=None):
	"""The prior.

	p_list: list of allowed p0's
	p_probs: [optional] dict of prior probabilities
	default is uniform
	"""
	if p_probs:
	# make sure prior is normalized
	norm = sum(p_probs.values())
	self.log_pdict = {p:np.log(p_probs[p]) - \
	np.log(norm) for p in p_list}
	else:
	n = len(p_list)
	self.log_pdict = {p:-np.log(n) for p in p_list}

	def __iter__(self):
	return iter(sorted(self.log_pdict))

	def log_prob(self, p):
	"""Get log prior probability for passed p0."""
	if p in self.log_pdict:
	return self.log_pdict[p]
	else:
	return -np.inf

	def prob(self, p):
	"""Get prior probability for passed p0."""
	if p in self.log_pdict:
	return np.exp(self.log_pdict[p])
	else:
	return 0.0


	class posterior:
	def __init__(self, data, prior):
	"""The posterior.

	data: a data sample as list
	prior: an instance of the prior class
	"""
	self.likelihood = likelihood(data)
	self.prior = prior

	self._process_posterior()

	def _process_posterior(self):
	"""Process the posterior using passed data and prior."""

	numerators = {}
	denominator = -np.inf
	for p in self.prior:
	numerators[p] = self.likelihood.log_prob(p) + \
	self.prior.log_prob(p)

	if numerators[p] != -np.inf:
	# np.logaddexp(-np.inf, -np.inf) issues warning
	# skip-- this is adding 0 + 0
	denominator = np.logaddexp(denominator,
	numerators[p])

	# save denominator in Bayes' Theorem
	self.log_marg_likelihood = denominator

	# calculate posterior
	self.log_pdict = {}
	for p in self.prior:
	self.log_pdict[p] = numerators[p] - \
	self.log_marg_likelihood

	def log_prob(self, p):
	"""Get log posterior probability for passed p."""
	if p in self.log_pdict:
	return self.log_pdict[p]
	else:
	return -np.inf

	def prob(self, p):
	"""Get posterior probability for passed p."""
	if p in self.log_pdict:
	return np.exp(self.log_pdict[p])
	else:
	return 0.0

	def plot(self):
	"""Plot the inference resuults."""

	f, ax= plt.subplots(3, 1, figsize=(8, 6), sharex=True)

	# get candidate probabilities from prior
	x = [p for p in self.prior]

	# plot prior
	y1 = np.array([self.prior.prob(p) for p in x])
	ax[0].stem(x, y1, linefmt='r-', markerfmt='ro', basefmt='w-')
	ax[0].set_ylabel("Prior", fontsize=14)
	ax[0].set_xlim(-0.05, 1.05)
	ax[0].set_ylim(0., 1.05*np.max(y1))

	# plot likelihood
	y2 = np.array([self.likelihood.prob(p) for p in x])
	ax[1].stem(x, y2, linefmt='k-', markerfmt='ko', basefmt='w-')
	ax[1].set_ylabel("Likelihood", fontsize=14)
	ax[1].set_xlim(-0.05, 1.05)
	ax[1].set_ylim(0., 1.05*np.max(y2))

	# plot posterior
	y3 = np.array([self.prob(p) for p in x])
	ax[2].stem(x, y3, linefmt='b-', markerfmt='bo', basefmt='w-')
	ax[2].set_ylabel("Posterior", fontsize=14)
	ax[2].set_xlabel("Probability of Zero", fontsize=14)
	ax[2].set_xlim(-0.05, 1.05)
	ax[2].set_ylim(0., 1.05*np.max(y3))

	plt.tight_layout()
	plt.show()

	if __name__ == '__main__':
	# data
	data1 = [0,0,0,0,1,1,0,0,0,1]

	# prior
	A1 = prior([0.2, 0.4, 0.6, 0.8])

	# posterior
	post1 = posterior(data1, A1)
	post1.plot()

	# prior -- will be normalized by class
	A2 = prior([0.2, 0.4, 0.6, 0.8],
	{0.2:1, 0.4:20, 0.6:1, 0.8:1})

	# posterior
	post2 = posterior(data1, A2)
	post2.plot()

	# set probability of 0
	p0 = 0.23
	# set rng seed to 42
	np.random.seed(42)
	# generate data
	data2 = np.random.choice([0,1], 500, p=[p0, 1.-p0])

	# prior
	A3 = prior(np.arange(0.0, 1.01, 0.01))

	# posterior
	post3 = posterior(data2, A3)
	post3.plot()