sezemiadmin/boxplot.py

## boxplot.py
# 箱ひげ図の描画
# 品種ごとに萼片の長さを見比べる
data = [setosa.SepalLength, versicolor.SepalLength, virginica.SepalLength]
plt.boxplot(data) # 箱ひげ図の描画 boxplot()
plt.xlabel("Class") # x軸名
plt.ylabel("SepalLength") # y軸名
# 各品種のラベル
plt.setp(plt.gca(), xlabel=["setosa", "versicolor", "virginica"]) # ラベル付けは setp()
plt.show()

## class_iris.py
# 品種ごとにデータを区分け
setosa = iris[iris["Class"] == "Iris-setosa"]
versicolor = iris[iris["Class"] == "Iris-versicolor"]
virginica = iris[iris["Class"] == "Iris-virginica"]

## corrcoef.py
np.corrcoef(setosa.SepalLength, setosa.SepalWidth)

## corrcoef_output.log
array([[ 1.        ,  0.74678037],
       [ 0.74678037,  1.        ]])

## describe.py
#要約統計量の表示
setosa.sum() # 合計
setosa.min() # 最小値
setosa.max() # 最大値
setosa.mean() # 平均値

## hist.py
import matplotlib.pyplot as plt
plt.hist(setosa.SepalLength) #萼片を対象にヒストグラムを作成
plt.xlabel("SepalLength") #x軸の項目名
plt.ylabel("Freq") #y軸の項目名 Frequency の略
plt.show() #グラフ表示

## pivot_table.py
import numpy as np
pd.pivot_table(iris, index="Class", aggfunc=np.mean)

## pivot_table_output.log
Class           PetalLength PetalWidth  SepalLength       SepalWidth
Iris-setosa     1.464	      0.244	      5.006	      3.418
Iris-versicolor	4.260	      1.326	      5.936	      2.770
Iris-virginica	5.552	      2.026	      6.588	      2.974

## predict.py
import sklearn.linear_model as lm

x = setosa[["SepalLength"]]
y = setosa[["SepalWidth"]]
ir = lm.LinearRegression() #回帰分析モデルの作成
ir.fit(x, y) #実際のデータを分析モデルに当てはめる

# 連続データの生成
px = np.arange(x.min(), x.max(), .01)[:, np.newaxis]

# 予測モデルの作成
py = ir.predict(px)

plt.plot(px, py, color='blue', linewidth=3) #値をプロット
plt.scatter(x, y, color='red')
plt.show()

## predict_output.log
ir.coef_
array([[ 0.80723367]])

ir.intercept_
array([-0.62301173])
	# 箱ひげ図の描画
	# 品種ごとに萼片の長さを見比べる
	data = [setosa.SepalLength, versicolor.SepalLength, virginica.SepalLength]
	plt.boxplot(data) # 箱ひげ図の描画 boxplot()
	plt.xlabel("Class") # x軸名
	plt.ylabel("SepalLength") # y軸名
	# 各品種のラベル
	plt.setp(plt.gca(), xlabel=["setosa", "versicolor", "virginica"]) # ラベル付けは setp()
	plt.show()
	# 品種ごとにデータを区分け
	setosa = iris[iris["Class"] == "Iris-setosa"]
	versicolor = iris[iris["Class"] == "Iris-versicolor"]
	virginica = iris[iris["Class"] == "Iris-virginica"]
	#要約統計量の表示
	setosa.sum() # 合計
	setosa.min() # 最小値
	setosa.max() # 最大値
	setosa.mean() # 平均値
	import matplotlib.pyplot as plt
	plt.hist(setosa.SepalLength) #萼片を対象にヒストグラムを作成
	plt.xlabel("SepalLength") #x軸の項目名
	plt.ylabel("Freq") #y軸の項目名 Frequency の略
	plt.show() #グラフ表示
	import numpy as np
	pd.pivot_table(iris, index="Class", aggfunc=np.mean)
	Class PetalLength PetalWidth SepalLength SepalWidth
	Iris-setosa 1.464 0.244 5.006 3.418
	Iris-versicolor 4.260 1.326 5.936 2.770
	Iris-virginica 5.552 2.026 6.588 2.974
	import sklearn.linear_model as lm

	x = setosa[["SepalLength"]]
	y = setosa[["SepalWidth"]]
	ir = lm.LinearRegression() #回帰分析モデルの作成
	ir.fit(x, y) #実際のデータを分析モデルに当てはめる

	# 連続データの生成
	px = np.arange(x.min(), x.max(), .01)[:, np.newaxis]

	# 予測モデルの作成
	py = ir.predict(px)

	plt.plot(px, py, color='blue', linewidth=3) #値をプロット
	plt.scatter(x, y, color='red')
	plt.show()
	ir.coef_
	array([[ 0.80723367]])

	ir.intercept_
	array([-0.62301173])