Created
May 19, 2020 04:11
-
-
Save pierrelouisbescond/78d5e6ba3e0aae96cfb5c67b15e5c310 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def std_dev_select(universe_constraints, constraints, population_in, features_names, generation_size, std_dev_factor, population_out_size, target, model): | |
# The names, min-max and number of features are extracted from the DataFrame | |
features_names = population_in.columns | |
features_nb = population_in.shape[1] | |
replication_factor = generation_size // population_in.shape[0] | |
# We replicate the population_in according to the replication factor | |
new_generation = pd.concat([population_in]*replication_factor, ignore_index=True) | |
# For each feature, we create a uniform distribution between its min and max, | |
# except for constrained values for which a unique value is set | |
for column_name in features_names: | |
if column_name in constraints.index: | |
new_generation[column_name] = np.ones(new_generation.shape[0])*constraints["constrained_feature_value"].loc[column_name] | |
else: | |
feature_min = universe_constraints.loc["min",column_name] | |
feature_max = universe_constraints.loc["max",column_name] | |
feature_std = universe_constraints.loc["std",column_name] | |
# Updated values = Original Value - ( Std_dev_factor * Std_dev ) + ( 2 * Std_dev_factor * Std_dev * Random (0,1) ) | |
new_generation[column_name]=new_generation[column_name]-(std_dev_factor*feature_std)+2*std_dev_factor*feature_std*np.random.random(new_generation.shape[0]) | |
# If somes of the values generated go beyond original limits, limits are applied | |
new_generation.loc[new_generation[column_name] < feature_min, column_name] = feature_min | |
new_generation.loc[new_generation[column_name] > feature_max, column_name] = feature_max | |
# We append the original population to the new generation to keep the best | |
# individuals of these two DataFrame | |
new_generation = new_generation.append(population_in, ignore_index=True) | |
# We calculate Y thanks to the model and the distance from target | |
new_generation["Y"]= model.predict(new_generation) | |
new_generation["target_distance"]=abs(new_generation["Y"]-np.ones(new_generation.shape[0])*target) | |
# We sort individuals according to their distance from the target and | |
# keep only the desired number of individuals | |
new_generation = new_generation.sort_values(by="target_distance").head(population_out_size) | |
return new_generation |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment