Last active
January 23, 2022 14:45
-
-
Save matsuken92/acff9fc19b680531792c to your computer and use it in GitHub Desktop.
Describe and explain Q-Q plot
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%matplotlib inline | |
import sys | |
import matplotlib.pyplot as plt | |
from matplotlib import animation as ani | |
import numpy as np | |
import pandas as pd | |
import scipy.stats as st | |
from scipy.special import ndtri | |
# Data Import | |
df = pd.read_table('Mansion2.data') | |
df2 = pd.DataFrame(df.values, columns=['Walk_min','distance','Price','Type','Area','Direction','Year']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data_size = len(df2) | |
plt.figure(figsize=(12,11)) | |
price = df2['Price'] | |
mins = df2['Walk_min'] | |
area = df2['Area'] | |
plt.subplot(221) | |
plt.hist(price,bins=20) | |
plt.title("Histgram of House Price") | |
plt.xlabel("Price") | |
plt.ylabel("Count") | |
plt.subplot(222) | |
plt.title("Scatter plot (Price - Area)") | |
plt.xlim(6000, 20000) | |
plt.ylabel("Area") | |
plt.xlabel("Price") | |
plt.scatter(price, area) | |
plt.subplot(223) | |
plt.title("Scatter plot (Prie - Walk mins)") | |
plt.scatter(price, mins) | |
plt.xlim(6000, 20000) | |
plt.xlabel("Price") | |
plt.ylabel("Walk mins") | |
plt.subplot(224) | |
plt.title("Scatter plot (Area - Walk mins)") | |
plt.scatter(area, mins) | |
plt.xlabel("Area") | |
plt.ylabel("Walk mins") | |
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ヒストグラムと正規分布の比較 | |
mu_p = np.mean(price) | |
var_p = np.var(price) | |
xx = np.linspace(min(price), max(price), 300) | |
x_density = st.norm.pdf(xx, loc=mu_p, scale=np.sqrt(var_p)) | |
plt.figure(figsize=(8,6)) | |
plt.hist(price,bins=20) | |
plt.title("Histgram of Price") | |
plt.xlabel("Price") | |
ax = plt.twinx() | |
ax.plot(xx, x_density, "red", linewidth=2, zorder=300) | |
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 累積のヒストグラムと累積正規分布の比較 | |
xx = np.linspace(min(price)-1000, max(price), 300) | |
x_cdensity = st.norm.cdf(xx, loc=mu_p, scale=np.sqrt(var_p)) | |
plt.figure(figsize=(8,6)) | |
plt.xlim(min(price)-1000, max(price)) | |
plt.ylim(0, 188) | |
plt.hist(price,bins=20, cumulative=True, histtype='step') | |
plt.title("Histgram of Price (Cumulative)") | |
plt.xlabel("Price") | |
ax = plt.twinx() | |
ax.set_xlim(min(price)-1000, max(price)) | |
ax.set_ylim(0,1) | |
ax.plot(xx, x_cdensity, "red", linewidth=2, zorder=300) | |
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 家賃 | |
plt.figure(figsize=(7,6)) | |
plt.xlim(0, 1) | |
plt.ylim(5900, 19500) | |
plt.title("House Price(sorted)", size=13) | |
plt.scatter(np.linspace(0, 1, data_size), price_ordered) | |
plt.grid(True) | |
# 正規累積分布関数 | |
plt.figure(figsize=(7,6)) | |
plt.xlim(-3, 3) | |
plt.ylim(0,1) | |
plt.title("Cumulative Norm Dist", size=13) | |
plt.scatter(np.linspace(-3, 3, data_size), st.norm.cdf(np.linspace(-3, 3, data_size))) | |
plt.grid(True) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 家賃 | |
# 家賃を値段の順番に並び替え | |
price_ordered = np.sort(price) | |
# 標準正規分布の逆関数(xの定義域と粒度は0-1の間をデータサイズの数分割したもの) | |
inv = ndtri(np.linspace(0, 1, data_size))#float(i)/len(price)) for i in range(len(price))] | |
plt.title("Q-Q Plot", size=13) | |
plt.xlabel("Theoretical Quantailes") | |
plt.ylabel("Price") | |
plt.ylim(5900, 20000) | |
plt.xlim(-3, 3) | |
plt.scatter(inv, price_ordered) | |
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data = price_ordered | |
def animate(nframe): | |
global num_frame | |
global data | |
sys.stdout.write(str(int(float(nframe)/num_frame*100)) + "%, ") | |
if nframe < 90: | |
ind = nframe * 2 | |
else: | |
ind = 90 + nframe | |
plt.clf() | |
# 小さい順に並べたデータのプロット | |
xx1 = np.linspace(0, 1, data_size) | |
plt.subplot(222) | |
plt.xlim(0, 1) | |
plt.ylim(min(data), max(data)) | |
plt.scatter(xx1, data) | |
plt.scatter(xx1[ind], data[ind], color='red', s=100, zorder=300) | |
plt.plot([xx1[ind],xx1[ind]],[0, 20000], "k", linewidth=2) | |
plt.plot([0,1],[data[ind], data[ind]], "k--", linewidth=1) | |
plt.title("Data(sorted)=%d"%data[ind], size=13) | |
plt.grid(True) | |
# 正規累積分布の描画 | |
plt.subplot(223) | |
xx2 = np.linspace(-3, 3, data_size) | |
plt.xlim(-3, 3) | |
plt.ylim(0,1) | |
plt.scatter(xx2, st.norm.cdf(xx2)) | |
c = st.norm.cdf(xx2[ind]) | |
inv_norm = ndtri(xx1[ind]) | |
plt.scatter(inv_norm, xx1[ind], color='red', s=100, zorder=300) | |
plt.plot([-3,3], [xx1[ind],xx1[ind]], "k", linewidth=2) | |
plt.plot([ndtri(xx1[ind]),inv_norm],[0,1], "k--", linewidth=1) | |
plt.title("Cumulative Norm Dist x=%.3f"%inv_norm, size=13) | |
plt.grid(True) | |
# Q-Qプロットの描画 | |
plt.subplot(221) | |
plt.title(u"Q-Q Plot (%.3f, %d)"%(inv_norm, data[ind]), size=13) | |
plt.ylim(min(data), max(data)) | |
plt.xlim(-3, 3) | |
plt.scatter(inv, data) | |
plt.scatter(inv[ind], data[ind], color='red', s=100, zorder=300) | |
plt.plot([-3,3],[data[ind], data[ind]], "k--", linewidth=1) | |
plt.plot([inv_norm, inv_norm],[min(data), max(data)], "k--", linewidth=1) | |
plt.grid(True) | |
# 対角直線の描画 | |
plt.plot([-3,3], [min(data), max(data)]) | |
# 情報描画エリア | |
plt.subplot(224) | |
plt.xlim(0,1) | |
plt.ylim(0,1) | |
plt.title("(%.3f,%.3f)"%(xx1[ind],xx1[ind])) | |
plt.plot([0,1],[0,1]) | |
plt.plot([0,xx1[ind]], [xx1[ind],xx1[ind]], "k", linewidth=2) | |
plt.plot([xx1[ind],xx1[ind]],[xx1[ind], 1], "k", linewidth=2,) | |
plt.hist(data, bins=20) | |
plt.show() | |
num_frame = 98 | |
fig = plt.figure(figsize=(10,10)) | |
anim = ani.FuncAnimation(fig, animate, frames=num_frame, blit=True) | |
anim.save('Q-Q_plot_House_price.gif', writer='imagemagick', fps=5, dpi=64) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 正規分布 | |
data = np.random.normal(loc=10, scale=3, size=188) | |
data = np.sort(data) | |
plt.hist(data, bins=20) | |
plt.show() | |
num_frame = 98 | |
fig = plt.figure(figsize=(10,10)) | |
anim = ani.FuncAnimation(fig, animate, frames=num_frame, blit=True) | |
anim.save('Q-Q_plot_Norm.gif', writer='imagemagick', fps=5, dpi=64) | |
# ---------------------- | |
#指数分布 | |
lam = 0.1 | |
data = np.random.exponential(1./lam, size=188) | |
data = np.sort(data) | |
plt.hist(data, bins=20) | |
plt.show() | |
num_frame = 98 | |
fig = plt.figure(figsize=(10,10)) | |
anim = ani.FuncAnimation(fig, animate, frames=num_frame, blit=True) | |
anim.save('Q-Q_plot_Exp_Dist.gif', writer='imagemagick', fps=5, dpi=64) | |
#-------------------------- | |
# F分布 | |
data = np.random.f(40, 50, 188) | |
data = np.sort(data) | |
plt.hist(data, bins=20) | |
plt.show() | |
num_frame = 94 + 4 #188 | |
fig = plt.figure(figsize=(10,10)) | |
anim = ani.FuncAnimation(fig, animate, frames=num_frame, blit=True) | |
anim.save('Q-Q_plot_F_Dist.gif', writer='imagemagick', fps=5, dpi=64) | |
#-------------------------- | |
# ベータ分布1 | |
data = np.random.beta(6, 2, 188) | |
data = np.sort(data) | |
plt.hist(data, bins=20) | |
plt.show() | |
num_frame = 98 | |
fig = plt.figure(figsize=(10,10)) | |
anim = ani.FuncAnimation(fig, animate, frames=num_frame, blit=True) | |
anim.save('Q-Q_plot_Beta_Dist.gif', writer='imagemagick', fps=5, dpi=64) | |
#-------------------------- | |
# ベータ分布2 | |
data = np.random.beta(0.5, 0.5, 188) | |
data = np.sort(data) | |
plt.hist(data, bins=20) | |
plt.show() | |
num_frame = 94 + 4 #188 | |
fig = plt.figure(figsize=(10,10)) | |
anim = ani.FuncAnimation(fig, animate, frames=num_frame, blit=True) | |
anim.save('Q-Q_plot_Beta_Dist2.gif', writer='imagemagick', fps=5, dpi=64) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment