This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df_test = pd.read_csv(PATH/'adult.test') | |
df_test['json'] = df_test.apply(lambda x: x.to_json(), axis=1) | |
messages = df_test.json.tolist() | |
def start_producing(): | |
producer = KafkaProducer(bootstrap_servers=KAFKA_HOST) | |
for i in range(200): | |
message_id = str(uuid.uuid4()) | |
message = {'request_id': message_id, 'data': json.loads(messages[i])} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def start(model_id, messages_count, batch_id): | |
for msg in consumer: | |
message = json.loads(msg.value) | |
if is_retraining_message(msg): | |
model_fname = 'model_{}_.p'.format(model_id) | |
model = reload_model(MODELS_PATH/model_fname) | |
print("NEW MODEL RELOADED {}".format(model_id)) | |
elif is_application_message(msg): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def start(): | |
consumer = KafkaConsumer(RETRAIN_TOPIC, bootstrap_servers=KAFKA_HOST) | |
for msg in consumer: | |
message = json.loads(msg.value) | |
if 'retrain' in message and message['retrain']: | |
model_id = message['model_id'] | |
batch_id = message['batch_id'] | |
message_fname = 'messages_{}_.txt'.format(batch_id) | |
messages = MESSAGES_PATH/message_fname |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def objective(params): | |
""" | |
objective function for lightgbm. | |
""" | |
# hyperopt casts as float | |
params['num_boost_round'] = int(params['num_boost_round']) | |
params['num_leaves'] = int(params['num_leaves']) | |
# need to be passed as parameter | |
params['is_unbalance'] = True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
params = { | |
'learning_rate': hp.uniform('learning_rate', 0.01, 0.2), | |
'num_boost_round': hp.quniform('num_boost_round', 50, 500, 10), | |
'num_leaves': hp.quniform('num_leaves', 31, 256, 4), | |
'min_child_weight': hp.uniform('min_child_weight', 0.1, 10), | |
'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1.), | |
'subsample': hp.uniform('subsample', 0.5, 1.), | |
'reg_alpha': hp.uniform('reg_alpha', 0.01, 0.1), | |
'reg_lambda': hp.uniform('reg_lambda', 0.01, 0.1), | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
objective.i=0 | |
trials = Trials() | |
best = fmin(fn=objective, | |
space=params, | |
algo=tpe.suggest, | |
max_evals=maxevals, | |
trials=trials) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if not Path('mlruns').exists(): | |
client = MlflowClient() | |
n_experiments=0 | |
elif not reuse_experiment: | |
client = MlflowClient() | |
n_experiments = len(client.list_experiments()) | |
experiment_name = 'experiment_' + str(n_experiments) | |
client.create_experiment(name=experiment_name) | |
with mlflow.start_run(experiment_id=n_experiments): | |
model = lgb.LGBMClassifier(**best) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
env = Environment( | |
train_dataset=data, | |
results_path='HyperparameterHunterAssets', | |
metrics=[metrics], | |
do_predict_proba = do_predict_proba, | |
cv_type=cv_type, | |
cv_params=dict(n_splits=n_splits), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
optimizer = opt.BayesianOptimization(iterations=maxevals) | |
optimizer.set_experiment_guidelines( | |
model_initializer=lgb.LGBMClassifier, | |
model_init_params=params, | |
model_extra_params=extra_params | |
) | |
optimizer.go() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
params = dict( | |
is_unbalance = True, | |
learning_rate = Real(0.01, 0.3), | |
num_boost_round=Integer(50, 500), | |
num_leaves=Integer(31, 255), | |
min_child_weight = Real(0.1, 10), | |
colsample_bytree= Real(0.5, 1.), | |
subsample=Real(0.5, 1.), | |
reg_alpha= Real(0.01, 0.1), | |
reg_lambda= Real(0.01, 0.1) |
OlderNewer