pierrelouisbescond/std_dev_select.py

## std_dev_select.py
def std_dev_select(universe_constraints, constraints, population_in, features_names, generation_size, std_dev_factor, population_out_size, target, model):

  # The names, min-max and number of features are extracted from the DataFrame
  features_names = population_in.columns
  features_nb = population_in.shape[1]
  replication_factor = generation_size // population_in.shape[0]

  # We replicate the population_in according to the replication factor
  new_generation = pd.concat([population_in]*replication_factor, ignore_index=True)

  # For each feature, we create a uniform distribution between its min and max,
  # except for constrained values for which a unique value is set
  for column_name in features_names:

    if column_name in constraints.index:
      new_generation[column_name] = np.ones(new_generation.shape[0])*constraints["constrained_feature_value"].loc[column_name]

    else:
      feature_min = universe_constraints.loc["min",column_name]
      feature_max = universe_constraints.loc["max",column_name]
      feature_std = universe_constraints.loc["std",column_name]

      # Updated values = Original Value - ( Std_dev_factor * Std_dev ) + ( 2 * Std_dev_factor * Std_dev * Random (0,1) )
      new_generation[column_name]=new_generation[column_name]-(std_dev_factor*feature_std)+2*std_dev_factor*feature_std*np.random.random(new_generation.shape[0])

      # If somes of the values generated go beyond original limits, limits are applied
      new_generation.loc[new_generation[column_name] < feature_min, column_name] = feature_min
      new_generation.loc[new_generation[column_name] > feature_max, column_name] = feature_max


  # We append the original population to the new generation to keep the best
  # individuals of these two DataFrame
  new_generation = new_generation.append(population_in, ignore_index=True)

  # We calculate Y thanks to the model and the distance from target
  new_generation["Y"]= model.predict(new_generation)
  new_generation["target_distance"]=abs(new_generation["Y"]-np.ones(new_generation.shape[0])*target)


  # We sort individuals according to their distance from the target and
  # keep only the desired number of individuals
  new_generation = new_generation.sort_values(by="target_distance").head(population_out_size)

  return new_generation
	def std_dev_select(universe_constraints, constraints, population_in, features_names, generation_size, std_dev_factor, population_out_size, target, model):

	# The names, min-max and number of features are extracted from the DataFrame
	features_names = population_in.columns
	features_nb = population_in.shape[1]
	replication_factor = generation_size // population_in.shape[0]

	# We replicate the population_in according to the replication factor
	new_generation = pd.concat([population_in]*replication_factor, ignore_index=True)

	# For each feature, we create a uniform distribution between its min and max,
	# except for constrained values for which a unique value is set
	for column_name in features_names:

	if column_name in constraints.index:
	new_generation[column_name] = np.ones(new_generation.shape[0])*constraints["constrained_feature_value"].loc[column_name]

	else:
	feature_min = universe_constraints.loc["min",column_name]
	feature_max = universe_constraints.loc["max",column_name]
	feature_std = universe_constraints.loc["std",column_name]

	# Updated values = Original Value - ( Std_dev_factor * Std_dev ) + ( 2 * Std_dev_factor * Std_dev * Random (0,1) )
	new_generation[column_name]=new_generation[column_name]-(std_dev_factorfeature_std)+2std_dev_factorfeature_stdnp.random.random(new_generation.shape[0])

	# If somes of the values generated go beyond original limits, limits are applied
	new_generation.loc[new_generation[column_name] < feature_min, column_name] = feature_min
	new_generation.loc[new_generation[column_name] > feature_max, column_name] = feature_max


	# We append the original population to the new generation to keep the best
	# individuals of these two DataFrame
	new_generation = new_generation.append(population_in, ignore_index=True)

	# We calculate Y thanks to the model and the distance from target
	new_generation["Y"]= model.predict(new_generation)
	new_generation["target_distance"]=abs(new_generation["Y"]-np.ones(new_generation.shape[0])*target)


	# We sort individuals according to their distance from the target and
	# keep only the desired number of individuals
	new_generation = new_generation.sort_values(by="target_distance").head(population_out_size)

	return new_generation