pierrelouisbescond/virtual_population_generation_1.py

## virtual_population_generation_1.py
# We store the describe() results inside a dataframe
df_describe = df.describe()
display(df_describe)

# We define the parameters of the virtual population we generate
population_size = 1000
features_names  = df.columns[:-1]
n_features = len(features_names)

# As an example, we assign a constant value for the third most important characteristic
constraint_feature = df_feature_importances.index[2]
constraint_feature_value = round(np.random.uniform(df_describe.loc["min",constraint_feature], df_describe.loc["max",constraint_feature]),3)
print("\n", constraint_feature_value, "is assigned to", constraint_feature,"\n")


# For each feature, we create a randomized array, except for the constrained one where the value is unique
population = pd.DataFrame(np.zeros((population_size,n_features)),
                          columns=features_names)

for column_name in features_names:
  if column_name!= constraint_feature:
    population[column_name] = np.random.uniform(df_describe.loc["min",column_name], df_describe.loc["max",column_name], population_size)
  else:
    population[column_name] = np.ones(population_size)*constraint_feature_value

display(population)
	# We store the describe() results inside a dataframe
	df_describe = df.describe()
	display(df_describe)

	# We define the parameters of the virtual population we generate
	population_size = 1000
	features_names = df.columns[:-1]
	n_features = len(features_names)

	# As an example, we assign a constant value for the third most important characteristic
	constraint_feature = df_feature_importances.index[2]
	constraint_feature_value = round(np.random.uniform(df_describe.loc["min",constraint_feature], df_describe.loc["max",constraint_feature]),3)
	print("\n", constraint_feature_value, "is assigned to", constraint_feature,"\n")


	# For each feature, we create a randomized array, except for the constrained one where the value is unique
	population = pd.DataFrame(np.zeros((population_size,n_features)),
	columns=features_names)

	for column_name in features_names:
	if column_name!= constraint_feature:
	population[column_name] = np.random.uniform(df_describe.loc["min",column_name], df_describe.loc["max",column_name], population_size)
	else:
	population[column_name] = np.ones(population_size)*constraint_feature_value

	display(population)