Skip to content

Instantly share code, notes, and snippets.

View sh16ma's full-sized avatar
🐻‍❄️

sh16ma sh16ma

🐻‍❄️
View GitHub Profile
@sh16ma
sh16ma / heatmaping.py
Last active January 19, 2022 10:15
#🐍 #Python #EDA #相関係数 #ヒートマップ #Correlation_Matrix
'''
seaborn でのヒートマップ描画
'''
# Correlation Matrix
fig, ax = plt.subplots(figsize=(30, 25))
mat = dt.corr('pearson')
mask = np.triu(np.ones_like(mat, dtype=bool))
cmap = sns.diverging_palette(230, 20, as_cmap=True)
@sh16ma
sh16ma / plots.py
Last active January 19, 2022 10:14
#🐍 #Python #EDA #ストライプ図 #バイオリン図 #箱ひげ図
'''
seabornでの描画:ストライプ図、バイオリン図、箱ひげ図
'''
import matplotlib.pyplot as plt
import seaborn as sns
def plots(x, y):
'''
x 特徴量、 y 目的変数
'''
@sh16ma
sh16ma / nan_pick.py
Last active January 19, 2022 10:14
#🐍 #Python #EDA #NaN #ランキング #断捨離
# Looking at NaN % within the data
nan = pd.DataFrame(all_df.isna().sum(), columns=['NaN_sum'])
nan['feat'] = nan.index
nan['ratio(%)'] = (nan['NaN_sum']/all_df.shape[0])*100
nan = nan[nan['NaN_sum'] > 0]
nan = nan.sort_values(by = ['NaN_sum']) #option: ascending=False 昇順
nan['Usability'] = np.where(nan['ratio(%)'] > 20, 'Discard', 'Keep')
nan
@sh16ma
sh16ma / prep_visualizations.py
Last active January 19, 2022 10:13
#🐍 #Python #import #visualization #最初に入れとくと便利
# visualization
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import missingno as msg
# データフレームを綺麗に出力する関数
import IPython
def display(*dfs, head=True):
for df in dfs:
IPython.display.display(df.head() if head else df)
@sh16ma
sh16ma / nan_rank.py
Last active January 19, 2022 10:13
#🐍 #Python #EDA #NaN #ランキング形式
def nan_rank(df, usabilty=20):
"""
df : データフレーム
usabilty : データの割合に応じての足切りライン
"""
nan = df.isnull().sum().reset_index()
nan.columns = ["name", "count"]
nan["ratio"] = (nan["count"] / df.shape[0])*100
nan["usabilty"] = np.where(nan["ratio"] > usabilty, "Discard", "Keep")
nan = nan[nan["count"] > 0].sort_values(by="ratio")
@sh16ma
sh16ma / evaluations_classfier_basic.py
Last active January 10, 2022 23:43
#🐍 #Python #評価指標 #混同行列 #正解率 #適合率 #再現率 #F値 #自作関数
from sklearn.metrics import accuracy_score # 正解率
from sklearn.metrics import precision_score # 適合率
from sklearn.metrics import recall_score # 再現率
from sklearn.metrics import f1_score # F値
def evaluations(y_test, y_predict, average) -> dict: # 辞書型で返す
"""
y_test:
予測したいデータ
y_predict:
@sh16ma
sh16ma / evaluation_classfier_practical.py
Last active January 19, 2022 10:13
#🐍 #Python #評価指標 #二値分類 #混同行列 #confusion_matrix
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
# preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
# algorithm
from sklearn.svm import SVC # サポートベクトルマシン
from sklearn.linear_model import LogisticRegression # ロジスティック回帰
@sh16ma
sh16ma / evaluation_regresser-basic.py
Last active January 19, 2022 10:12
#🐍 #Python #評価指標 #回帰
# libraries
import numpy as np
from sklearn.metrics import r2_score # 決定係数(R^2)
from sklearn.metrics import mean_squared_error # 平均二乗誤差(MSE)
from sklearn.metrics import mean_absolute_error # 平気絶対値誤差(MAE)
# ------------------------------
#  途中式割愛
# ------------------------------
@sh16ma
sh16ma / perusal.js
Last active February 3, 2022 04:06
#🏷 #GoogleTagManager #コンテンツ効果測定
/*
*
*
*/
function() {
var read_timer_flg = false;
// scroll ratio.
function getScrollAmount() {
var scroll = window.pageYOffset;
@sh16ma
sh16ma / statistics_value.sql
Last active January 19, 2022 10:11
#🔎 #BigQuery #要約統計量
#standaardSQL
select
/**********************************************
* Google BigQueryでの要約統計量
**********************************************/
count(passenger_count) as n
, avg(passenger_count) as mean
, stddev(passenger_count) as std
, min(passenger_count) as min