Skip to content

Instantly share code, notes, and snippets.

@pierrelouisbescond
Created May 19, 2020 04:11
Show Gist options
  • Save pierrelouisbescond/78d5e6ba3e0aae96cfb5c67b15e5c310 to your computer and use it in GitHub Desktop.
Save pierrelouisbescond/78d5e6ba3e0aae96cfb5c67b15e5c310 to your computer and use it in GitHub Desktop.
def std_dev_select(universe_constraints, constraints, population_in, features_names, generation_size, std_dev_factor, population_out_size, target, model):
# The names, min-max and number of features are extracted from the DataFrame
features_names = population_in.columns
features_nb = population_in.shape[1]
replication_factor = generation_size // population_in.shape[0]
# We replicate the population_in according to the replication factor
new_generation = pd.concat([population_in]*replication_factor, ignore_index=True)
# For each feature, we create a uniform distribution between its min and max,
# except for constrained values for which a unique value is set
for column_name in features_names:
if column_name in constraints.index:
new_generation[column_name] = np.ones(new_generation.shape[0])*constraints["constrained_feature_value"].loc[column_name]
else:
feature_min = universe_constraints.loc["min",column_name]
feature_max = universe_constraints.loc["max",column_name]
feature_std = universe_constraints.loc["std",column_name]
# Updated values = Original Value - ( Std_dev_factor * Std_dev ) + ( 2 * Std_dev_factor * Std_dev * Random (0,1) )
new_generation[column_name]=new_generation[column_name]-(std_dev_factor*feature_std)+2*std_dev_factor*feature_std*np.random.random(new_generation.shape[0])
# If somes of the values generated go beyond original limits, limits are applied
new_generation.loc[new_generation[column_name] < feature_min, column_name] = feature_min
new_generation.loc[new_generation[column_name] > feature_max, column_name] = feature_max
# We append the original population to the new generation to keep the best
# individuals of these two DataFrame
new_generation = new_generation.append(population_in, ignore_index=True)
# We calculate Y thanks to the model and the distance from target
new_generation["Y"]= model.predict(new_generation)
new_generation["target_distance"]=abs(new_generation["Y"]-np.ones(new_generation.shape[0])*target)
# We sort individuals according to their distance from the target and
# keep only the desired number of individuals
new_generation = new_generation.sort_values(by="target_distance").head(population_out_size)
return new_generation
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment