Skip to content

Instantly share code, notes, and snippets.

View pierrelouisbescond's full-sized avatar

Pierre-Louis BESCOND pierrelouisbescond

View GitHub Profile
# -*- coding: utf-8 -*-
# We start with the import of standard ML librairies
import pandas as pd
import numpy as np
import math
from sklearn.datasets import make_regression
from sklearn.ensemble import RandomForestRegressor
# We create a Features Importance Bar Chart
fig_features_importance = go.Figure()
fig_features_importance.add_trace(go.Bar(x=df_feature_importances.index,
y=df_feature_importances["Importance"],
marker_color='rgb(171, 226, 251)')
)
fig_features_importance.update_layout(title_text='<b>Features Importance of the model<b>', title_x=0.5)
# The command below can be activated in a standard notebook to display the chart
#fig_features_importance.show()
###############################################################################
app = dash.Dash()
# The page structure will be:
# Features Importance Chart
# <H4> Feature #1 name
# Slider to update Feature #1 value
# <H4> Feature #2 name
# Slider to update Feature #2 value
# The callback function will provide one "Ouput" in the form of a string (=children)
@app.callback(Output(component_id="prediction_result",component_property="children"),
# The values correspnding to the three sliders are obtained by calling their id and value property
[Input("X1_slider","value"), Input("X2_slider","value"), Input("X3_slider","value")])
# The input variable are set in the same order as the callback Inputs
def update_prediction(X1, X2, X3):
# We create a NumPy array in the form of the original features
# ["Pressure","Viscosity","Particles_size", "Temperature","Inlet_flow", "Rotating_Speed","pH","Color_density"]
# Let's import standard data process librairies
import pandas as pd
pd.options.display.max_columns = 15
import numpy as np
# Sklearn librairies for Data Generation, Imputation and Modeling
from sklearn.datasets import make_regression
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import SimpleImputer, KNNImputer, IterativeImputer
# We split the original DataFrame into train and test datasets
X_train, X_test, y_train, y_test = train_test_split(df.drop("Y",axis=1),df["Y"],random_state=22)
# We define the models to benchmark
models = [Lasso(), KNeighborsRegressor(), RandomForestRegressor(), GradientBoostingRegressor()]
# And create the corresponding model's names' list
model_names = []
for model in models: model_names.append(type(model).__name__)
# We record the original score achieved by each model on the "test" set after
# We create a list of incremental steps to perform
steps = []
for i in range (1,21): steps.append(i)
# We define the imputation methods used
imputation_methods = [SimpleImputer(strategy='constant', fill_value=0),
SimpleImputer(strategy='mean'),
IterativeImputer(),
KNNImputer()]
# We calculate the difference between the original performance of each model
# against the performances achieved with corrupted and imputed data
results_difference_from_initial = results.copy()
for model_name in model_names:
col_names_tmp = results_difference_from_initial.filter(like=model_name).columns
for col in col_names_tmp:
results_difference_from_initial[col]=initial_scores.loc[model_name,"Score"]-results_difference_from_initial[col]
results_difference_from_initial
import numpy as np
import pandas as pd
import math
import plotly.graph_objects as go
size = 500
# For every dimension, we create a normal distribution by setting the mean, standard deviation and size
df = pd.DataFrame(np.random.normal(0, 5, size), columns=["x"])
df["y"] = np.random.normal(0, 10, size)+df["x"]
df["z"] = np.random.normal(0, 10, size)+df["x"]*2
min_axis, max_axis = -50, 50
fig = go.Figure()
# Let's plot the whole dataset
fig.add_trace(go.Scatter3d(x=df["X"], y=df["y"], z=df["z"],mode='markers', name="dataset"))
# And add a virtual 2D plan based on some of the dataset dots
fig.add_trace(go.Scatter3d(x=[-14,0,4,15], y=[-17,-27,30,7], z=[-31,-1,15,41],mode='markers', surfaceaxis=1, opacity=0.5, name="surface"))
fig.update_traces(marker=dict(size=3))
fig.update_layout(
scene = dict(