Skip to content

Instantly share code, notes, and snippets.

@ckrapu
Created March 10, 2021 19:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ckrapu/b88a90b882595fad1e536619e0c18702 to your computer and use it in GitHub Desktop.
Save ckrapu/b88a90b882595fad1e536619e0c18702 to your computer and use it in GitHub Desktop.
estimate-means-missing-gaussian
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc3 as pm
# Define number of entities
p = 4
# Define number of obs. per entity
n = 6
true_means = np.random.randn(p)*3
x = np.random.randn(n,p) + true_means
col_labels = ['ent_{0}'.format(i) for i in range(p)]
index = np.arange(n)
df = pd.DataFrame(data=x, columns=col_labels, index=index)
df = df.mask(np.random.random(df.shape) < 0.25)
print(df)
with pm.Model() as model:
means = pm.Normal('means', shape=p, sd=5)
sigmas = pm.HalfCauchy('sigmas', shape=p, beta=1.0)
x = pm.Normal('x', mu=means, sigma=sigmas, observed=df)
trace = pm.sample()
for i in range(p):
samples = trace['means'][:, i]
plt.plot([i,i], [samples.mean() - 2*samples.std(), samples.mean() + 2*samples.std()], color='k')
plt.scatter(np.arange(p), [trace['means'][:,i].mean() for i in range(p)],label='Posterior mean estimate', color='k')
plt.scatter(np.arange(p), true_means, label='True value', color='c')
plt.legend()
plt.xlabel('Entity') plt.ylabel('Value');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment