Skip to content

Instantly share code, notes, and snippets.

shap_values = model.get_feature_importance(Pool(X_test, label=y_test,cat_features=categorical_features_indices),
type="ShapValues")
expected_value = shap_values[0,-1]
shap_values = shap_values[:,:-1]
shap.initjs()
shap.force_plot(expected_value, shap_values[3,:], X_test.iloc[3,:])
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
cb = CatBoostRegressor()
cb.get_feature_importance(type= "___")
"type" possible values:
- PredictionValuesChange
- LossFunctionChange
- FeatureImportance
PredictionValuesChange for non-ranking metrics and LossFunctionChange for ranking metrics
- ShapValues
Calculate SHAP Values for every object
@aswalin
aswalin / catboost
Last active April 14, 2019 03:37
catboost
from catboost import *
train_data = [["a", 1, 1], [ "b", 3, 0], [ "a", 3, 1]]
test_data = [[ "a", 1, 2]]
train_labels = [10, 20, 30]
model = CatBoostRegressor(iterations=10)
model.fit(train_data, train_labels)
import torch
infersent = torch.load('InferSent/encoder/infersent.allnli.pickle', map_location=lambda storage, loc: storage)
infersent.set_glove_path("InferSent/dataset/GloVe/glove.840B.300d.txt")
infersent.build_vocab(sentences, tokenize=True)
dict_embeddings = {}
for i in range(len(sentences)):
print(i)
from sklearn import metrics
import numpy as np
y_true = np.concatenate((np.ones(100), np.zeros(900)))
a = np.random.uniform(0.5,1, 5)
b = np.random.uniform(0,0.5, 995)
y_pred1 = np.concatenate((a,b))
a = np.random.uniform(0.5,1, 90)
@aswalin
aswalin / BLEU Score
Last active June 8, 2022 04:47
Machine Translation Metric - BLEU Score
from nltk.translate.bleu_score import sentence_bleu
reference = [['the', 'cat',"is","sitting","on","the","mat"]]
candidate = ["on",'the',"mat","is","a","cat"]
score = sentence_bleu( reference, candidate)
print(score)
from nltk.translate.bleu_score import sentence_bleu
reference = [['the', 'cat',"is","sitting","on","the","mat"]]
candidate = ["there",'is',"cat","sitting","cat"]
@aswalin
aswalin / Linear_Regression_Python
Last active November 5, 2021 09:05
Understanding the difference between R_squared and Adjusted R_squared
import numpy as np
import pandas as pd
from sklearn import datasets, linear_model
def metrics(m,X,y):
yhat = m.predict(X)
print(yhat)
SS_Residual = sum((y-yhat)**2)
SS_Total = sum((y-np.mean(y))**2)
r_squared = 1 - (float(SS_Residual))/SS_Total
@aswalin
aswalin / CatBoost
Last active November 15, 2022 16:00
import catboost as cb
cat_features_index = [0,1,2,3,4,5,6]
def auc(m, train, test):
return (metrics.roc_auc_score(y_train,m.predict_proba(train)[:,1]),
metrics.roc_auc_score(y_test,m.predict_proba(test)[:,1]))
params = {'depth': [4, 7, 10],
'learning_rate' : [0.03, 0.1, 0.15],
'l2_leaf_reg': [1,4,9],
import pandas as pd, numpy as np, time
from sklearn.model_selection import train_test_split
data = pd.read_csv("flights.csv")
data = data.sample(frac = 0.1, random_state=10)
data = data[["MONTH","DAY","DAY_OF_WEEK","AIRLINE","FLIGHT_NUMBER","DESTINATION_AIRPORT",
"ORIGIN_AIRPORT","AIR_TIME", "DEPARTURE_TIME","DISTANCE","ARRIVAL_DELAY"]]
data.dropna(inplace=True)