Skip to content

Instantly share code, notes, and snippets.

df_test = pd.read_csv(PATH/'adult.test')
df_test['json'] = df_test.apply(lambda x: x.to_json(), axis=1)
messages = df_test.json.tolist()
def start_producing():
producer = KafkaProducer(bootstrap_servers=KAFKA_HOST)
for i in range(200):
message_id = str(uuid.uuid4())
message = {'request_id': message_id, 'data': json.loads(messages[i])}
def start(model_id, messages_count, batch_id):
for msg in consumer:
message = json.loads(msg.value)
if is_retraining_message(msg):
model_fname = 'model_{}_.p'.format(model_id)
model = reload_model(MODELS_PATH/model_fname)
print("NEW MODEL RELOADED {}".format(model_id))
elif is_application_message(msg):
def start():
consumer = KafkaConsumer(RETRAIN_TOPIC, bootstrap_servers=KAFKA_HOST)
for msg in consumer:
message = json.loads(msg.value)
if 'retrain' in message and message['retrain']:
model_id = message['model_id']
batch_id = message['batch_id']
message_fname = 'messages_{}_.txt'.format(batch_id)
messages = MESSAGES_PATH/message_fname
def objective(params):
"""
objective function for lightgbm.
"""
# hyperopt casts as float
params['num_boost_round'] = int(params['num_boost_round'])
params['num_leaves'] = int(params['num_leaves'])
# need to be passed as parameter
params['is_unbalance'] = True
params = {
'learning_rate': hp.uniform('learning_rate', 0.01, 0.2),
'num_boost_round': hp.quniform('num_boost_round', 50, 500, 10),
'num_leaves': hp.quniform('num_leaves', 31, 256, 4),
'min_child_weight': hp.uniform('min_child_weight', 0.1, 10),
'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1.),
'subsample': hp.uniform('subsample', 0.5, 1.),
'reg_alpha': hp.uniform('reg_alpha', 0.01, 0.1),
'reg_lambda': hp.uniform('reg_lambda', 0.01, 0.1),
}
objective.i=0
trials = Trials()
best = fmin(fn=objective,
space=params,
algo=tpe.suggest,
max_evals=maxevals,
trials=trials)
if not Path('mlruns').exists():
client = MlflowClient()
n_experiments=0
elif not reuse_experiment:
client = MlflowClient()
n_experiments = len(client.list_experiments())
experiment_name = 'experiment_' + str(n_experiments)
client.create_experiment(name=experiment_name)
with mlflow.start_run(experiment_id=n_experiments):
model = lgb.LGBMClassifier(**best)
env = Environment(
train_dataset=data,
results_path='HyperparameterHunterAssets',
metrics=[metrics],
do_predict_proba = do_predict_proba,
cv_type=cv_type,
cv_params=dict(n_splits=n_splits),
optimizer = opt.BayesianOptimization(iterations=maxevals)
optimizer.set_experiment_guidelines(
model_initializer=lgb.LGBMClassifier,
model_init_params=params,
model_extra_params=extra_params
)
optimizer.go()
params = dict(
is_unbalance = True,
learning_rate = Real(0.01, 0.3),
num_boost_round=Integer(50, 500),
num_leaves=Integer(31, 255),
min_child_weight = Real(0.1, 10),
colsample_bytree= Real(0.5, 1.),
subsample=Real(0.5, 1.),
reg_alpha= Real(0.01, 0.1),
reg_lambda= Real(0.01, 0.1)