# unaoya/NormalSample.py

Created January 16, 2019 00:15
 import numpy as np import matplotlib.pyplot as plt mu = 0 sigma = 1 n = 5 sample = np.random.normal(size=n, loc=mu, scale=sigma) print(np.mean(sample)) print(np.var(sample)) print(sample) N = int(1e+5) #標本平均のヒストグラムを重ねて表示 fig = plt.figure() ax = fig.add_subplot(1,1,1) for n in [5,10,20,50]: sample = np.random.normal(size=(N, n), loc=mu, scale=sigma) sample_mean = np.mean(sample, axis=1) ax.hist(sample_mean, bins=50, density=True, alpha = 0.5) print('n=%d, mean=%s, sd=%s' % (n, np.mean(sample_mean), np.std(sample_mean))) fig.show() #標本平均のヒストグラムを分割して表示 fig = plt.figure() n = [5,10,20,50] for i in range(4): ax = fig.add_subplot(2,2,i+1) sample = np.random.normal(size=(N, n[i]), loc=mu, scale=sigma) sample_mean = np.mean(sample, axis=1) plt.hist(sample_mean, bins=50, density=True, alpha = 0.5) plt.xlim(-2,2) plt.ylim(0,3) print('n=%d, mean=%s, sd=%s' % (n[i], np.mean(sample_mean), np.std(sample_mean))) fig.show() # 標本分散のヒストグラムを重ねて表示 fig = plt.figure() ax = fig.add_subplot(1,1,1) for n in [5,10,20,50]: sample = np.random.normal(size=(N, n), loc=mu, scale=sigma) sample_var = np.var(sample, axis=1) ax.hist(sample_var, bins=50, density=True, alpha = 0.5) print('n=%d, mean=%s, sd=%s' % (n, np.mean(sample_var), np.std(sample_var))) fig.show() # 標本分散のヒストグラムを分割して表示 fig = plt.figure() n = [5,10,20,50] for i in range(4): ax = fig.add_subplot(2,2,i+1) sample = np.random.normal(size=(N, n[i]), loc=mu, scale=sigma) sample_var = np.var(sample, axis=1) plt.hist(sample_var, bins=50, density=True, alpha = 0.5) plt.xlim(0,3) plt.ylim(0,2.5) print('n=%d, mean=%s, sd=%s' % (n[i], np.mean(sample_var), np.std(sample_var))) fig.show()
 #標準正規分布に従う標本の標本平均と標本分散の分布を調べる library(ggplot2) library(dplyr) set.seed(123) mu <- 0 sigma <- 1 n <- 5 sample <- rnorm(n = n, mean = mu, sd = sigma) sample mean(sample) var(sample) # データをサンプリングして、標本平均と標本分散を計算 d <- data.frame(mean = c(), var = c(), n = c()) N <- 1e+5 for(n in c(5,10,20,50)){ means <- c() vars <- c() for(i in 1:N){ sample <- rnorm(n = n, mean = mu, sd = sigma) means[i] <- mean(sample) vars[i] <- var(sample) } d <- rbind(d, data.frame(means, vars, n = as.factor(rep(n, N)))) } # 標本平均のヒストグラムを重ねて表示 ggplot(data = d, aes(x = means, fill = n)) + geom_histogram(position = "identity", alpha = 0.5) # 標本平均のヒストグラムを分割して表示 ggplot(data = d, aes(x = means, fill = n)) + geom_histogram() + facet_wrap( ~ n) # 標本平均の分布の平均と標準偏差を計算 d %>% dplyr::select(means, n) %>% dplyr::group_by(n) %>% dplyr::summarise_all(funs(mean,sd)) # 標本分散のヒストグラムを重ねて表示 ggplot(data = d, aes(x=vars, fill = n)) + geom_histogram(position = "identity", alpha = 0.5) # 標本分散のヒストグラムを分割して表示 ggplot(data = d, aes(x = vars, fill = n)) + geom_histogram() + facet_wrap( ~ n) # 標本分散の分布の平均と標準偏差を計算 d %>% dplyr::select(vars, n) %>% dplyr::group_by(n) %>% dplyr::summarise_all(funs(mean,sd))