Pierre-Louis BESCOND pierrelouisbescond

## create_industrial_use_case_and_model.py
# -*- coding: utf-8 -*-

# We start with the import of standard ML librairies
import pandas as pd
import numpy as np
import math

from sklearn.datasets import make_regression
from sklearn.ensemble import RandomForestRegressor

## create_barchart_and_sliders_labels_ranges.py
# We create a Features Importance Bar Chart
fig_features_importance = go.Figure()
fig_features_importance.add_trace(go.Bar(x=df_feature_importances.index,
                                         y=df_feature_importances["Importance"],
                                         marker_color='rgb(171, 226, 251)')
                                 )
fig_features_importance.update_layout(title_text='<b>Features Importance of the model<b>', title_x=0.5)
# The command below can be activated in a standard notebook to display the chart
#fig_features_importance.show()

## layout_html_structure.py
###############################################################################

app = dash.Dash()

# The page structure will be:
#    Features Importance Chart
#    <H4> Feature #1 name
#    Slider to update Feature #1 value
#    <H4> Feature #2 name
#    Slider to update Feature #2 value

## appcall_back.py
# The callback function will provide one "Ouput" in the form of a string (=children)
@app.callback(Output(component_id="prediction_result",component_property="children"),
# The values correspnding to the three sliders are obtained by calling their id and value property
              [Input("X1_slider","value"), Input("X2_slider","value"), Input("X3_slider","value")])

# The input variable are set in the same order as the callback Inputs
def update_prediction(X1, X2, X3):

    # We create a NumPy array in the form of the original features
    # ["Pressure","Viscosity","Particles_size", "Temperature","Inlet_flow", "Rotating_Speed","pH","Color_density"]

## abnormal_values_impact_make_regression.py
# Let's import standard data process librairies
import pandas as pd
pd.options.display.max_columns = 15
import numpy as np

# Sklearn librairies for Data Generation, Imputation and Modeling
from sklearn.datasets import make_regression

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import SimpleImputer, KNNImputer, IterativeImputer

## abnormal_values_impact_initial_scores.py
# We split the original DataFrame into train and test datasets
X_train, X_test, y_train, y_test = train_test_split(df.drop("Y",axis=1),df["Y"],random_state=22)

# We define the models to benchmark
models = [Lasso(), KNeighborsRegressor(), RandomForestRegressor(), GradientBoostingRegressor()]
# And create the corresponding model's names' list
model_names = []
for model in models: model_names.append(type(model).__name__)

# We record the original score achieved by each model on the "test" set after

## abnormal_values_impact_corrupt_impute_and_test.py
# We create a list of incremental steps to perform
steps = []
for i in range (1,21): steps.append(i)

# We define the imputation methods used
imputation_methods = [SimpleImputer(strategy='constant', fill_value=0),
                      SimpleImputer(strategy='mean'),
                      IterativeImputer(),
                      KNNImputer()]

## abnormal_values_impact_results_and_display.py
# We calculate the difference between the original performance of each model
# against the performances achieved with corrupted and imputed data
results_difference_from_initial = results.copy()

for model_name in model_names:
  col_names_tmp = results_difference_from_initial.filter(like=model_name).columns
  for col in col_names_tmp:
    results_difference_from_initial[col]=initial_scores.loc[model_name,"Score"]-results_difference_from_initial[col]

results_difference_from_initial

## pca_create_dataset.py
import numpy as np
import pandas as pd
import math
import plotly.graph_objects as go

size = 500
# For every dimension, we create a normal distribution by setting the mean, standard deviation and size
df = pd.DataFrame(np.random.normal(0, 5, size), columns=["x"])
df["y"] = np.random.normal(0, 10, size)+df["x"]
df["z"] = np.random.normal(0, 10, size)+df["x"]*2

## pca_3D_space.py
min_axis, max_axis = -50, 50

fig = go.Figure()
# Let's plot the whole dataset
fig.add_trace(go.Scatter3d(x=df["X"], y=df["y"], z=df["z"],mode='markers', name="dataset"))
# And add a virtual 2D plan based on some of the dataset dots
fig.add_trace(go.Scatter3d(x=[-14,0,4,15], y=[-17,-27,30,7], z=[-31,-1,15,41],mode='markers', surfaceaxis=1, opacity=0.5, name="surface"))
fig.update_traces(marker=dict(size=3))
fig.update_layout(
    scene = dict(
	# -- coding: utf-8 --

	# We start with the import of standard ML librairies
	import pandas as pd
	import numpy as np
	import math

	from sklearn.datasets import make_regression
	from sklearn.ensemble import RandomForestRegressor
	# We create a Features Importance Bar Chart
	fig_features_importance = go.Figure()
	fig_features_importance.add_trace(go.Bar(x=df_feature_importances.index,
	y=df_feature_importances["Importance"],
	marker_color='rgb(171, 226, 251)')
	)
	fig_features_importance.update_layout(title_text='<b>Features Importance of the model<b>', title_x=0.5)
	# The command below can be activated in a standard notebook to display the chart
	#fig_features_importance.show()
	###############################################################################

	app = dash.Dash()

	# The page structure will be:
	# Features Importance Chart
	# <H4> Feature #1 name
	# Slider to update Feature #1 value
	# <H4> Feature #2 name
	# Slider to update Feature #2 value
	# The callback function will provide one "Ouput" in the form of a string (=children)
	@app.callback(Output(component_id="prediction_result",component_property="children"),
	# The values correspnding to the three sliders are obtained by calling their id and value property
	[Input("X1_slider","value"), Input("X2_slider","value"), Input("X3_slider","value")])

	# The input variable are set in the same order as the callback Inputs
	def update_prediction(X1, X2, X3):

	# We create a NumPy array in the form of the original features
	# ["Pressure","Viscosity","Particles_size", "Temperature","Inlet_flow", "Rotating_Speed","pH","Color_density"]
	# Let's import standard data process librairies
	import pandas as pd
	pd.options.display.max_columns = 15
	import numpy as np

	# Sklearn librairies for Data Generation, Imputation and Modeling
	from sklearn.datasets import make_regression

	from sklearn.experimental import enable_iterative_imputer
	from sklearn.impute import SimpleImputer, KNNImputer, IterativeImputer
	# We split the original DataFrame into train and test datasets
	X_train, X_test, y_train, y_test = train_test_split(df.drop("Y",axis=1),df["Y"],random_state=22)

	# We define the models to benchmark
	models = [Lasso(), KNeighborsRegressor(), RandomForestRegressor(), GradientBoostingRegressor()]
	# And create the corresponding model's names' list
	model_names = []
	for model in models: model_names.append(type(model).__name__)

	# We record the original score achieved by each model on the "test" set after
	# We create a list of incremental steps to perform
	steps = []
	for i in range (1,21): steps.append(i)

	# We define the imputation methods used
	imputation_methods = [SimpleImputer(strategy='constant', fill_value=0),
	SimpleImputer(strategy='mean'),
	IterativeImputer(),
	KNNImputer()]
	# We calculate the difference between the original performance of each model
	# against the performances achieved with corrupted and imputed data
	results_difference_from_initial = results.copy()

	for model_name in model_names:
	col_names_tmp = results_difference_from_initial.filter(like=model_name).columns
	for col in col_names_tmp:
	results_difference_from_initial[col]=initial_scores.loc[model_name,"Score"]-results_difference_from_initial[col]

	results_difference_from_initial
	min_axis, max_axis = -50, 50

	fig = go.Figure()
	# Let's plot the whole dataset
	fig.add_trace(go.Scatter3d(x=df["X"], y=df["y"], z=df["z"],mode='markers', name="dataset"))
	# And add a virtual 2D plan based on some of the dataset dots
	fig.add_trace(go.Scatter3d(x=[-14,0,4,15], y=[-17,-27,30,7], z=[-31,-1,15,41],mode='markers', surfaceaxis=1, opacity=0.5, name="surface"))
	fig.update_traces(marker=dict(size=3))
	fig.update_layout(
	scene = dict(