Pierre-Louis BESCOND pierrelouisbescond

## feature_importances.py
df_feature_importances = pd.DataFrame(RFR.feature_importances_,
                                      columns=["Importance"],
                                      index=col_names)
df_feature_importances = df_feature_importances.sort_values("Importance", ascending=False)
df_feature_importances

## create_non_linear_model.py
from sklearn.ensemble import RandomForestRegressor
RFR = RandomForestRegressor()
RFR.fit(X, y)

## target_distance_calculation.py
target = 42

population["Y"] = RFR.predict(population)
population["target_distance"]=abs(population["Y"]-np.ones(population_size)*target)

population

## make_regression.py
import pandas as pd
import numpy as np
import time

from sklearn.datasets import make_regression

n_samples = 1000
n_features = 10
n_informative = 3

## generate_min_max_population.py
def generate_min_max_population(df, constraints, generation_size):

  # The names, min-max and number of features are extracted from the DataFrame
  features_nb = df.shape[1]
  features_names = df.columns
  df_min_max = df.describe().loc[["min","max"],:]

  # We initialize the new population DataFrame with zeros
  new_population = pd.DataFrame(np.zeros((generation_size,features_nb)), columns=features_names)


## min_max_select.py
def min_max_select(constraints, population_in, features_names, generation_size, population_out_size, target, model):

  # We create a new generation, based on the input population characteristics
  new_generation = generate_min_max_population(population_in, constraints, generation_size)

  # We append the original population to the new generation to keep the best
  # individuals of these two DataFrames
  new_generation = new_generation.append(population_in, ignore_index=True)

  # We calculate Y thanks to the model and the distance from target

## min_max_selection_process.py
# We might use this array to set values on specific features
constraints = pd.DataFrame({'constrained_feature': ["X1", "X3"], 'constrained_feature_value': [-1, 4]}).set_index("constrained_feature")

# We define the number of individuals at each generation and the selected number
generation_size = 100
population_out_size = 10

# We initiate the 1st population, based on the original dataset features
starting_population = generate_min_max_population(df.drop("Y", axis=1), constraints, generation_size)
features_names = starting_population.columns

## std_dev_select.py
def std_dev_select(universe_constraints, constraints, population_in, features_names, generation_size, std_dev_factor, population_out_size, target, model):

  # The names, min-max and number of features are extracted from the DataFrame
  features_names = population_in.columns
  features_nb = population_in.shape[1]
  replication_factor = generation_size // population_in.shape[0]

  # We replicate the population_in according to the replication factor
  new_generation = pd.concat([population_in]*replication_factor, ignore_index=True)

## std_dev_selection_process.py
# We might use this array to set absolute boundaries
universe_constraints = df.describe().loc[["min","max","std"],:]

# We might use this array to set values on specific features
constraints = pd.DataFrame({'constrained_feature': ["X1", "X3"], 'constrained_feature_value': [-1, 4]}).set_index("constrained_feature")

# We define the number of individuals at each generation and the selected number
generation_size = 100
population_out_size = 10

## virtual_population_generation_1.py
# We store the describe() results inside a dataframe
df_describe = df.describe()
display(df_describe)

# We define the parameters of the virtual population we generate
population_size = 1000
features_names  = df.columns[:-1]
n_features = len(features_names)

# As an example, we assign a constant value for the third most important characteristic
	df_feature_importances = pd.DataFrame(RFR.feature_importances_,
	columns=["Importance"],
	index=col_names)
	df_feature_importances = df_feature_importances.sort_values("Importance", ascending=False)
	df_feature_importances
	from sklearn.ensemble import RandomForestRegressor
	RFR = RandomForestRegressor()
	RFR.fit(X, y)
	target = 42

	population["Y"] = RFR.predict(population)
	population["target_distance"]=abs(population["Y"]-np.ones(population_size)*target)

	population
	import pandas as pd
	import numpy as np
	import time

	from sklearn.datasets import make_regression

	n_samples = 1000
	n_features = 10
	n_informative = 3
	def generate_min_max_population(df, constraints, generation_size):

	# The names, min-max and number of features are extracted from the DataFrame
	features_nb = df.shape[1]
	features_names = df.columns
	df_min_max = df.describe().loc[["min","max"],:]

	# We initialize the new population DataFrame with zeros
	new_population = pd.DataFrame(np.zeros((generation_size,features_nb)), columns=features_names)
	def min_max_select(constraints, population_in, features_names, generation_size, population_out_size, target, model):

	# We create a new generation, based on the input population characteristics
	new_generation = generate_min_max_population(population_in, constraints, generation_size)

	# We append the original population to the new generation to keep the best
	# individuals of these two DataFrames
	new_generation = new_generation.append(population_in, ignore_index=True)

	# We calculate Y thanks to the model and the distance from target
	# We might use this array to set values on specific features
	constraints = pd.DataFrame({'constrained_feature': ["X1", "X3"], 'constrained_feature_value': [-1, 4]}).set_index("constrained_feature")

	# We define the number of individuals at each generation and the selected number
	generation_size = 100
	population_out_size = 10

	# We initiate the 1st population, based on the original dataset features
	starting_population = generate_min_max_population(df.drop("Y", axis=1), constraints, generation_size)
	features_names = starting_population.columns
	def std_dev_select(universe_constraints, constraints, population_in, features_names, generation_size, std_dev_factor, population_out_size, target, model):

	# The names, min-max and number of features are extracted from the DataFrame
	features_names = population_in.columns
	features_nb = population_in.shape[1]
	replication_factor = generation_size // population_in.shape[0]

	# We replicate the population_in according to the replication factor
	new_generation = pd.concat([population_in]*replication_factor, ignore_index=True)
	# We might use this array to set absolute boundaries
	universe_constraints = df.describe().loc[["min","max","std"],:]

	# We might use this array to set values on specific features
	constraints = pd.DataFrame({'constrained_feature': ["X1", "X3"], 'constrained_feature_value': [-1, 4]}).set_index("constrained_feature")

	# We define the number of individuals at each generation and the selected number
	generation_size = 100
	population_out_size = 10
	# We store the describe() results inside a dataframe
	df_describe = df.describe()
	display(df_describe)

	# We define the parameters of the virtual population we generate
	population_size = 1000
	features_names = df.columns[:-1]
	n_features = len(features_names)

	# As an example, we assign a constant value for the third most important characteristic