Skip to content

Instantly share code, notes, and snippets.

@glemaitre
glemaitre / TreeSHAP_bug.ipynb
Last active March 29, 2022 09:55
TreeSHAP bug reproducer
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# %%
# Download the original dataset to be able to easily build an index with the
# original datetime.
# The dataset is available at:
# https://archive.ics.uci.edu/ml/machine-learning-databases/00275/Bike-Sharing-Dataset.zip
import pandas as pd
df_external = pd.read_csv(
"~/Downloads/Bike-Sharing-Dataset/hour.csv",
index_col=0,
# %%
from sklearn.datasets import fetch_openml
usps = fetch_openml(data_id=41082)
# %%
data = usps.data
target = usps.target
# %%
import numpy as np
import pandas as pd
def calcul_chute_tension(
Ib=1, S=1.5, Un=400, L=0.1, metal="cuivre", phi=np.arccos(0.85)
):
Ib = np.asarray(Ib)
S = np.asarray(S)
import pandas as pd
import pytest
def func(expected_columns):
df = pd.DataFrame({
"A": [1, 2, 3],
"B": [1, 2, 3],
"C": [1, 2, 3]
@pytest.mark.parametrize("name, Tree", REG_TREES.items())
@pytest.mark.parametrize("criterion", REG_CRITERIONS)
def test_diabetes_overfit(name, Tree, criterion):
# check consistency of overfitted trees on the diabetes dataset
# since the trees will overfit, we expect an MSE of 0
reg = Tree(criterion=criterion, random_state=0)
reg.fit(diabetes.data, diabetes.target)
score = mean_squared_error(diabetes.target, reg.predict(diabetes.data))
assert score == pytest.approx(0), (
f"Failed with {name}, criterion = {criterion} and score = {score}"
In [1]: import numpy as np
In [2]: X = ["One", "string"]
In [3]: X
Out[3]: ['One', 'string']
In [4]: X[0]
Out[4]: 'One'
# %%
from sklearn.datasets import make_classification
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_validate
RANDOM_SEED = 2
import cv2
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
def grab_frame(cap):
_, frame = cap.read()
return cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
import asyncio
import random
import time
from concurrent.futures import ProcessPoolExecutor
def simulator_submission():
"""Give ``None`` or a submission id."""
return random.choice([random.randint(0, 1000), None])