setavenger/Sheet_2.py

## Sheet_2.py

from scipy.stats import poisson
from scipy.stats import binom

from sympy import Matrix, S, linsolve, symbols

import numpy as np
import matplotlib.pyplot as plt

import pandas as pd

# from astropy.Table import Table


mus = [0.5, 1, 2, 3, 5, 10, 20]
numbers = [30, 100, 500]


for mu in mus:
    fig, ax = plt.subplots(1, 1)

    x = np.arange(poisson.ppf(0.0001, mu), poisson.ppf(0.9999, mu))

    ax.plot(x, poisson.pmf(x, mu), 'bo', color='r', ms=8, label='poisson pmf')

    ax.vlines(x, 0, poisson.pmf(x, mu), colors='grey', lw=5, alpha=0.5)

    plt.title('Poisson distribution:    pmf for λ = ' + str(mu))

    ax.legend(loc='best', frameon=False)

    plt.xlabel('x')

    plt.ylabel('fx(x)')

    plt.show()

for mu in mus:
    for n in numbers:
        fig, ax = plt.subplots(1, 1)

        # calculate p

        A = Matrix([n])
        b = Matrix([mu])

        prob = symbols('p')

        b = linsolve((A, b), [prob])

        res = next(iter(b))

        # res is the calculated probability for binomial

        p = float(res[0])

        x = np.arange(poisson.ppf(0.0001, mu), poisson.ppf(0.9999, mu))

        ax.plot(x, poisson.pmf(x, mu), 'bo', color='r', ms=8, label='poisson pmf')

        ax.vlines(x, 0, poisson.pmf(x, mu), colors='grey', lw=5, alpha=0.5)

        ax.plot(x, binom.pmf(x, n, p), 'bo', marker='*', ms=8, label='binom pmf')

        # ax.vlines(x, 0, binom.pmf(x, n, p), colors='b', lw=5, alpha=0.5)

        plt.title('Poisson λ = ' + str(mu) + ' / BN n = ' + str(n) + ', p = ' + str(round(p, 4)))

        ax.legend(loc='best', frameon=False)

        end = max(list(binom.pmf(x, n, p)) + list(poisson.pmf(x, mu)))

        plt.xlabel('x')

        plt.ylabel('pmf poisson (red) / pmf binomial (blue)')

        plt.show()

print('4c')
print('The larger the n and the lower the corresponding probability the more the Poisson Distribution and '
      'Binomial Distribution align. the effect is espacially visible in the later plots where the probability '
      'is very high. Thus we can say that n is the driving force to the accuracy becaus e the probability is '
      'calculated based on n.')
print()
print()
'''
Exercise 5
'''
print('Exercise 5')

num_fat_acc = [0, 1, 2, 3, 4]
obs_freq = [109, 65, 22, 3, 1]

comb = dict(zip(num_fat_acc, obs_freq))

total_frq = sum(list(comb.values()))
mu = comb

avg = 0
header = ['x', 'absH', 'relH', 'pmf', 'error']
table = []

for i in comb.keys():
    freq = comb[i]
    relh = freq/total_frq

    avg += (i * relh)


for i in comb.keys():
    values = []

    freq = comb[i]
    relh = freq / total_frq

    pmf = poisson.pmf(i, avg)
    error = relh - pmf

    values.append(i)
    values.append(freq)
    values.append(round(relh, 4))
    values.append(round(pmf, 4))
    values.append(round(error, 7))
    table.append(values)

df = pd.DataFrame(data=table, columns=header)
print('mean of fatal accidents', round(avg, 4))

fig, ax = plt.subplots(1, 1)

ax.plot(df['x'], df['pmf'], 'bo', color='r', ms=8, label='poisson pmf')

ax.vlines(df['x'], 0, df['pmf'], colors='grey', lw=5, alpha=0.5)

ax.plot(df['x'], df['relH'], 'bo', marker='*', ms=8, label='binom pmf')

plt.xlabel('x')

plt.ylabel('rel. freq. (blue) / pmf (red)')

plt.title('Comparison of rel. freq. (blue) and pmf (red)')

plt.show()

df = df.set_index('x')
print(df)

	from scipy.stats import poisson
	from scipy.stats import binom

	from sympy import Matrix, S, linsolve, symbols

	import numpy as np
	import matplotlib.pyplot as plt

	import pandas as pd

	# from astropy.Table import Table


	mus = [0.5, 1, 2, 3, 5, 10, 20]
	numbers = [30, 100, 500]


	for mu in mus:
	fig, ax = plt.subplots(1, 1)

	x = np.arange(poisson.ppf(0.0001, mu), poisson.ppf(0.9999, mu))

	ax.plot(x, poisson.pmf(x, mu), 'bo', color='r', ms=8, label='poisson pmf')

	ax.vlines(x, 0, poisson.pmf(x, mu), colors='grey', lw=5, alpha=0.5)

	plt.title('Poisson distribution: pmf for λ = ' + str(mu))

	ax.legend(loc='best', frameon=False)

	plt.xlabel('x')

	plt.ylabel('fx(x)')

	plt.show()

	for mu in mus:
	for n in numbers:
	fig, ax = plt.subplots(1, 1)

	# calculate p

	A = Matrix([n])
	b = Matrix([mu])

	prob = symbols('p')

	b = linsolve((A, b), [prob])

	res = next(iter(b))

	# res is the calculated probability for binomial

	p = float(res[0])

	x = np.arange(poisson.ppf(0.0001, mu), poisson.ppf(0.9999, mu))

	ax.plot(x, poisson.pmf(x, mu), 'bo', color='r', ms=8, label='poisson pmf')

	ax.vlines(x, 0, poisson.pmf(x, mu), colors='grey', lw=5, alpha=0.5)

	ax.plot(x, binom.pmf(x, n, p), 'bo', marker='*', ms=8, label='binom pmf')

	# ax.vlines(x, 0, binom.pmf(x, n, p), colors='b', lw=5, alpha=0.5)

	plt.title('Poisson λ = ' + str(mu) + ' / BN n = ' + str(n) + ', p = ' + str(round(p, 4)))

	ax.legend(loc='best', frameon=False)

	end = max(list(binom.pmf(x, n, p)) + list(poisson.pmf(x, mu)))

	plt.xlabel('x')

	plt.ylabel('pmf poisson (red) / pmf binomial (blue)')

	plt.show()

	print('4c')
	print('The larger the n and the lower the corresponding probability the more the Poisson Distribution and '
	'Binomial Distribution align. the effect is espacially visible in the later plots where the probability '
	'is very high. Thus we can say that n is the driving force to the accuracy becaus e the probability is '
	'calculated based on n.')
	print()
	print()
	'''
	Exercise 5
	'''
	print('Exercise 5')

	num_fat_acc = [0, 1, 2, 3, 4]
	obs_freq = [109, 65, 22, 3, 1]

	comb = dict(zip(num_fat_acc, obs_freq))

	total_frq = sum(list(comb.values()))
	mu = comb

	avg = 0
	header = ['x', 'absH', 'relH', 'pmf', 'error']
	table = []

	for i in comb.keys():
	freq = comb[i]
	relh = freq/total_frq

	avg += (i * relh)


	for i in comb.keys():
	values = []

	freq = comb[i]
	relh = freq / total_frq

	pmf = poisson.pmf(i, avg)
	error = relh - pmf

	values.append(i)
	values.append(freq)
	values.append(round(relh, 4))
	values.append(round(pmf, 4))
	values.append(round(error, 7))
	table.append(values)

	df = pd.DataFrame(data=table, columns=header)
	print('mean of fatal accidents', round(avg, 4))

	fig, ax = plt.subplots(1, 1)

	ax.plot(df['x'], df['pmf'], 'bo', color='r', ms=8, label='poisson pmf')

	ax.vlines(df['x'], 0, df['pmf'], colors='grey', lw=5, alpha=0.5)

	ax.plot(df['x'], df['relH'], 'bo', marker='*', ms=8, label='binom pmf')

	plt.xlabel('x')

	plt.ylabel('rel. freq. (blue) / pmf (red)')

	plt.title('Comparison of rel. freq. (blue) and pmf (red)')

	plt.show()

	df = df.set_index('x')
	print(df)