Skip to content

Instantly share code, notes, and snippets.

@VincentGat
VincentGat / data.py
Last active February 25, 2020 15:17
def get_data():
"""Synthetic binary classification dataset."""
data, targets = make_classification(
n_samples=50000,
n_features=45,
n_informative=12,
n_redundant=7,
random_state=134985745,
)
return data, targets
@VincentGat
VincentGat / ae.py
Last active February 24, 2020 21:20
from deap.algorithms import eaSimple
a = eaSimple(pop, toolbox, cxpb=0.3, mutpb=0.1, ngen=50 verbose=True)
toolbox.register("select", tools.selTournament, tournsize=2)
def mutate(indiv, indpb):
for param_name, param_value in indiv.params.items():
if random.random() < indpb:
indiv.params[param_name] = indiv.generate_hyperparams()[param_name]
indiv.features = np.array([feat if random.random() > indpb else not feat for feat in indiv.features])
return indiv,
toolbox.register("mutate", mutate, indpb=0.05)
from deap import base, creator, tools
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", Model, fitness=creator.FitnessMax)
toolbox = base.Toolbox()
toolbox.register("population", tools.initRepeat, list, creator.Individual)
toolbox.register("evaluate", evaluate)
pop = toolbox.population(100)
def evaluate(indiv):
rkf = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=SEED)
data = x_train[:, indiv.features]
perfo = []
for train_index, test_index in rkf.split(X=data, y=y_train):
x_train_split, x_test_split = data[train_index], data[test_index]
y_train_split, y_test_split = y_train[train_index], y_train[test_index]
model = indiv.model
model.fit(x_train_split, y_train_split)
y_pred = model.predict(x_test_split)
# repair rf if broken
if self.params['max_features'] > np.sum(self.features):
self.params['max_features'] = random.randint(1, max(2, np.sum(self.features)))
class Model:
nb_features = x.shape[1]
def __init__(self):
self.model_family = random.choice(['nb', 'rf', 'lgbm', 'catboost'], p=[0, 0.3334, 0.3333, 0.3333])
self._features = random.choice([True, False], self.nb_features)
self.params = self.generate_hyperparams()
@property
def features(self):
model_family = random.choice(['nb', 'rf', 'lgbm', 'catboost'], p=[0, 0.3334, 0.3333, 0.3333])
features = random.choice([True, False], nb_features)
def cxModel(ind1, ind2, swap_indpb=0.5):
new_feat_ind1 = []
new_feat_ind2 = []
for feature1, feature2 in zip(ind1.features, ind2.features):
if random.random() < swap_indpb:
new_feat_ind1.append(feature2)
new_feat_ind2.append(feature1)
else:
new_feat_ind1.append(feature1)
new_feat_ind2.append(feature2)