Skip to content

Instantly share code, notes, and snippets.

View pierrelouisbescond's full-sized avatar

Pierre-Louis BESCOND pierrelouisbescond

View GitHub Profile
df_feature_importances = pd.DataFrame(RFR.feature_importances_,
columns=["Importance"],
index=col_names)
df_feature_importances = df_feature_importances.sort_values("Importance", ascending=False)
df_feature_importances
from sklearn.ensemble import RandomForestRegressor
RFR = RandomForestRegressor()
RFR.fit(X, y)
target = 42
population["Y"] = RFR.predict(population)
population["target_distance"]=abs(population["Y"]-np.ones(population_size)*target)
population
import pandas as pd
import numpy as np
import time
from sklearn.datasets import make_regression
n_samples = 1000
n_features = 10
n_informative = 3
def generate_min_max_population(df, constraints, generation_size):
# The names, min-max and number of features are extracted from the DataFrame
features_nb = df.shape[1]
features_names = df.columns
df_min_max = df.describe().loc[["min","max"],:]
# We initialize the new population DataFrame with zeros
new_population = pd.DataFrame(np.zeros((generation_size,features_nb)), columns=features_names)
def min_max_select(constraints, population_in, features_names, generation_size, population_out_size, target, model):
# We create a new generation, based on the input population characteristics
new_generation = generate_min_max_population(population_in, constraints, generation_size)
# We append the original population to the new generation to keep the best
# individuals of these two DataFrames
new_generation = new_generation.append(population_in, ignore_index=True)
# We calculate Y thanks to the model and the distance from target
# We might use this array to set values on specific features
constraints = pd.DataFrame({'constrained_feature': ["X1", "X3"], 'constrained_feature_value': [-1, 4]}).set_index("constrained_feature")
# We define the number of individuals at each generation and the selected number
generation_size = 100
population_out_size = 10
# We initiate the 1st population, based on the original dataset features
starting_population = generate_min_max_population(df.drop("Y", axis=1), constraints, generation_size)
features_names = starting_population.columns
def std_dev_select(universe_constraints, constraints, population_in, features_names, generation_size, std_dev_factor, population_out_size, target, model):
# The names, min-max and number of features are extracted from the DataFrame
features_names = population_in.columns
features_nb = population_in.shape[1]
replication_factor = generation_size // population_in.shape[0]
# We replicate the population_in according to the replication factor
new_generation = pd.concat([population_in]*replication_factor, ignore_index=True)
# We might use this array to set absolute boundaries
universe_constraints = df.describe().loc[["min","max","std"],:]
# We might use this array to set values on specific features
constraints = pd.DataFrame({'constrained_feature': ["X1", "X3"], 'constrained_feature_value': [-1, 4]}).set_index("constrained_feature")
# We define the number of individuals at each generation and the selected number
generation_size = 100
population_out_size = 10
# We store the describe() results inside a dataframe
df_describe = df.describe()
display(df_describe)
# We define the parameters of the virtual population we generate
population_size = 1000
features_names = df.columns[:-1]
n_features = len(features_names)
# As an example, we assign a constant value for the third most important characteristic