This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
seaborn でのヒートマップ描画 | |
''' | |
# Correlation Matrix | |
fig, ax = plt.subplots(figsize=(30, 25)) | |
mat = dt.corr('pearson') | |
mask = np.triu(np.ones_like(mat, dtype=bool)) | |
cmap = sns.diverging_palette(230, 20, as_cmap=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
seabornでの描画:ストライプ図、バイオリン図、箱ひげ図 | |
''' | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
def plots(x, y): | |
''' | |
x 特徴量、 y 目的変数 | |
''' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Looking at NaN % within the data | |
nan = pd.DataFrame(all_df.isna().sum(), columns=['NaN_sum']) | |
nan['feat'] = nan.index | |
nan['ratio(%)'] = (nan['NaN_sum']/all_df.shape[0])*100 | |
nan = nan[nan['NaN_sum'] > 0] | |
nan = nan.sort_values(by = ['NaN_sum']) #option: ascending=False 昇順 | |
nan['Usability'] = np.where(nan['ratio(%)'] > 20, 'Discard', 'Keep') | |
nan |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# visualization | |
import matplotlib.pyplot as plt | |
import seaborn as sns; sns.set() | |
import missingno as msg | |
# データフレームを綺麗に出力する関数 | |
import IPython | |
def display(*dfs, head=True): | |
for df in dfs: | |
IPython.display.display(df.head() if head else df) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def nan_rank(df, usabilty=20): | |
""" | |
df : データフレーム | |
usabilty : データの割合に応じての足切りライン | |
""" | |
nan = df.isnull().sum().reset_index() | |
nan.columns = ["name", "count"] | |
nan["ratio"] = (nan["count"] / df.shape[0])*100 | |
nan["usabilty"] = np.where(nan["ratio"] > usabilty, "Discard", "Keep") | |
nan = nan[nan["count"] > 0].sort_values(by="ratio") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.metrics import accuracy_score # 正解率 | |
from sklearn.metrics import precision_score # 適合率 | |
from sklearn.metrics import recall_score # 再現率 | |
from sklearn.metrics import f1_score # F値 | |
def evaluations(y_test, y_predict, average) -> dict: # 辞書型で返す | |
""" | |
y_test: | |
予測したいデータ | |
y_predict: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%matplotlib inline | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
# preprocessing | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.model_selection import train_test_split | |
from sklearn.pipeline import Pipeline | |
# algorithm | |
from sklearn.svm import SVC # サポートベクトルマシン | |
from sklearn.linear_model import LogisticRegression # ロジスティック回帰 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# libraries | |
import numpy as np | |
from sklearn.metrics import r2_score # 決定係数(R^2) | |
from sklearn.metrics import mean_squared_error # 平均二乗誤差(MSE) | |
from sklearn.metrics import mean_absolute_error # 平気絶対値誤差(MAE) | |
# ------------------------------ | |
# 途中式割愛 | |
# ------------------------------ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* | |
* | |
*/ | |
function() { | |
var read_timer_flg = false; | |
// scroll ratio. | |
function getScrollAmount() { | |
var scroll = window.pageYOffset; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#standaardSQL | |
select | |
/********************************************** | |
* Google BigQueryでの要約統計量 | |
**********************************************/ | |
count(passenger_count) as n | |
, avg(passenger_count) as mean | |
, stddev(passenger_count) as std | |
, min(passenger_count) as min |
OlderNewer