Skip to content

Instantly share code, notes, and snippets.

@unaoya
Created January 16, 2019 00:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save unaoya/84e68c6de05f17cc9d557e2695eb2071 to your computer and use it in GitHub Desktop.
Save unaoya/84e68c6de05f17cc9d557e2695eb2071 to your computer and use it in GitHub Desktop.
標準正規分布に従う標本の平均と分散の分布
import numpy as np
import matplotlib.pyplot as plt
mu = 0
sigma = 1
n = 5
sample = np.random.normal(size=n, loc=mu, scale=sigma)
print(np.mean(sample))
print(np.var(sample))
print(sample)
N = int(1e+5)
#標本平均のヒストグラムを重ねて表示
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
for n in [5,10,20,50]:
sample = np.random.normal(size=(N, n), loc=mu, scale=sigma)
sample_mean = np.mean(sample, axis=1)
ax.hist(sample_mean, bins=50, density=True, alpha = 0.5)
print('n=%d, mean=%s, sd=%s' % (n, np.mean(sample_mean), np.std(sample_mean)))
fig.show()
#標本平均のヒストグラムを分割して表示
fig = plt.figure()
n = [5,10,20,50]
for i in range(4):
ax = fig.add_subplot(2,2,i+1)
sample = np.random.normal(size=(N, n[i]), loc=mu, scale=sigma)
sample_mean = np.mean(sample, axis=1)
plt.hist(sample_mean, bins=50, density=True, alpha = 0.5)
plt.xlim(-2,2)
plt.ylim(0,3)
print('n=%d, mean=%s, sd=%s' % (n[i], np.mean(sample_mean), np.std(sample_mean)))
fig.show()
# 標本分散のヒストグラムを重ねて表示
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
for n in [5,10,20,50]:
sample = np.random.normal(size=(N, n), loc=mu, scale=sigma)
sample_var = np.var(sample, axis=1)
ax.hist(sample_var, bins=50, density=True, alpha = 0.5)
print('n=%d, mean=%s, sd=%s' % (n, np.mean(sample_var), np.std(sample_var)))
fig.show()
# 標本分散のヒストグラムを分割して表示
fig = plt.figure()
n = [5,10,20,50]
for i in range(4):
ax = fig.add_subplot(2,2,i+1)
sample = np.random.normal(size=(N, n[i]), loc=mu, scale=sigma)
sample_var = np.var(sample, axis=1)
plt.hist(sample_var, bins=50, density=True, alpha = 0.5)
plt.xlim(0,3)
plt.ylim(0,2.5)
print('n=%d, mean=%s, sd=%s' % (n[i], np.mean(sample_var), np.std(sample_var)))
fig.show()
#標準正規分布に従う標本の標本平均と標本分散の分布を調べる
library(ggplot2)
library(dplyr)
set.seed(123)
mu <- 0
sigma <- 1
n <- 5
sample <- rnorm(n = n, mean = mu, sd = sigma)
sample
mean(sample)
var(sample)
# データをサンプリングして、標本平均と標本分散を計算
d <- data.frame(mean = c(), var = c(), n = c())
N <- 1e+5
for(n in c(5,10,20,50)){
means <- c()
vars <- c()
for(i in 1:N){
sample <- rnorm(n = n, mean = mu, sd = sigma)
means[i] <- mean(sample)
vars[i] <- var(sample)
}
d <- rbind(d, data.frame(means, vars, n = as.factor(rep(n, N))))
}
# 標本平均のヒストグラムを重ねて表示
ggplot(data = d, aes(x = means, fill = n)) +
geom_histogram(position = "identity", alpha = 0.5)
# 標本平均のヒストグラムを分割して表示
ggplot(data = d, aes(x = means, fill = n)) +
geom_histogram() +
facet_wrap( ~ n)
# 標本平均の分布の平均と標準偏差を計算
d %>%
dplyr::select(means, n) %>%
dplyr::group_by(n) %>%
dplyr::summarise_all(funs(mean,sd))
# 標本分散のヒストグラムを重ねて表示
ggplot(data = d, aes(x=vars, fill = n)) +
geom_histogram(position = "identity", alpha = 0.5)
# 標本分散のヒストグラムを分割して表示
ggplot(data = d, aes(x = vars, fill = n)) +
geom_histogram() +
facet_wrap( ~ n)
# 標本分散の分布の平均と標準偏差を計算
d %>%
dplyr::select(vars, n) %>%
dplyr::group_by(n) %>%
dplyr::summarise_all(funs(mean,sd))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment