Mike Kraus MBKraus

## dataset.py
from sklearn import datasets
from sklearn.model_selection import train_test_split

# Generate dataset with 1000 samples, 100 features and 2 classes

def gen_dataset(n_samples=1000, n_features=100, n_classes=2, random_state=123):
    X, y = datasets.make_classification(
        n_features=n_features,
        n_samples=n_samples,
        n_informative=int(0.6 * n_features),    # the number of informative features

## Parameters_random_grid.py
import lightgbm as lgb
import numpy as np
from sklearn import pipeline
from hyperopt import hp

pipe = pipeline.Pipeline([
    ('clf', lgb.LGBMClassifier())
    ])

param_gridsearch = {

## grid_random.py
from sklearn.model_selection import cross_val_score, GridSearchCV, RandomizedSearchCV
import pandas as pd
import time

def search(pipeline, parameters, X_train, y_train, X_test, y_test, optimizer='grid_search', n_iter=None):

    start = time.time()

    if optimizer == 'grid_search':
        grid_obj = GridSearchCV(estimator=pipeline,

## hyperopt.py
import lightgbm as lgb
from sklearn.model_selection import cross_val_score
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from time import time

def hyperopt(param_space, X_train, y_train, X_test, y_test, num_eval):

    start = time.time()

    def objective_function(params):

## hyperopt_param.py
param_hyperopt= {
    'learning_rate': hp.loguniform('learning_rate', np.log(0.01), np.log(1)),
    'max_depth': scope.int(hp.quniform('max_depth', 5, 15, 1)),
    'n_estimators': scope.int(hp.quniform('n_estimators', 5, 35, 1)),
    'num_leaves': scope.int(hp.quniform('num_leaves', 5, 50, 1)),
    'boosting_type': hp.choice('boosting_type', ['gbdt', 'dart']),
    'colsample_bytree': hp.uniform('colsample_by_tree', 0.6, 1.0),
    'reg_lambda': hp.uniform('reg_lambda', 0.0, 1.0),
}

## objective_function.py
from hyperopt import STATUS_OK
import lightgbm as lgb

def objective_function(params):
  clf = lgb.LGBMClassifier(**params)
  score = cross_val_score(clf, X_train, y_train, cv=5).mean()
  return {'loss': -score, 'status': STATUS_OK}

## Optimiser.py
from hyperopt import fmin, tpe, Trials
import numpy as np

trials = Trials()
best_param = fmin(objective_function, param_space, algo=tpe.suggest, max_evals=num_eval, trials=trials, rstate= np.random.RandomState(1))

## run.py
num_eval =75

results_grid, estimator_grid = search(pipe, param_gridsearch, X_train, y_train, X_test, y_test, 'grid_search')
results_random, estimator_random = search(pipe, param_random, X_train, y_train, X_test, y_test, 'random_search', num_eval)
results_hyperopt = hyperopt(param_hyperopt, X_train, y_train, X_test, y_test, num_eval)

## initial_model_DAG.py
import airflow
from airflow.models import DAG
from airflow.operators.python_operator import PythonOperator
from src.models.initial_model_functions import load_preprocess, fit_model

PATH_STREAM_SAMPLE = "/data/stream_sample.p"
PATH_TEST_SET = "/data/test_set.p"
INITIAL_MODEL_PATH = "/models/current_model/initial_model.H5"

BATCH_SIZE = 128

## initial_model_functions.py
import keras
from keras.datasets import mnist
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
import pickle
import logging
import os
	from sklearn import datasets
	from sklearn.model_selection import train_test_split

	# Generate dataset with 1000 samples, 100 features and 2 classes

	def gen_dataset(n_samples=1000, n_features=100, n_classes=2, random_state=123):
	X, y = datasets.make_classification(
	n_features=n_features,
	n_samples=n_samples,
	n_informative=int(0.6 * n_features), # the number of informative features
	import lightgbm as lgb
	import numpy as np
	from sklearn import pipeline
	from hyperopt import hp

	pipe = pipeline.Pipeline([
	('clf', lgb.LGBMClassifier())
	])

	param_gridsearch = {
	from sklearn.model_selection import cross_val_score, GridSearchCV, RandomizedSearchCV
	import pandas as pd
	import time

	def search(pipeline, parameters, X_train, y_train, X_test, y_test, optimizer='grid_search', n_iter=None):

	start = time.time()

	if optimizer == 'grid_search':
	grid_obj = GridSearchCV(estimator=pipeline,
	import lightgbm as lgb
	from sklearn.model_selection import cross_val_score
	from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
	from time import time

	def hyperopt(param_space, X_train, y_train, X_test, y_test, num_eval):

	start = time.time()

	def objective_function(params):
	param_hyperopt= {
	'learning_rate': hp.loguniform('learning_rate', np.log(0.01), np.log(1)),
	'max_depth': scope.int(hp.quniform('max_depth', 5, 15, 1)),
	'n_estimators': scope.int(hp.quniform('n_estimators', 5, 35, 1)),
	'num_leaves': scope.int(hp.quniform('num_leaves', 5, 50, 1)),
	'boosting_type': hp.choice('boosting_type', ['gbdt', 'dart']),
	'colsample_bytree': hp.uniform('colsample_by_tree', 0.6, 1.0),
	'reg_lambda': hp.uniform('reg_lambda', 0.0, 1.0),
	}
	from hyperopt import STATUS_OK
	import lightgbm as lgb

	def objective_function(params):
	clf = lgb.LGBMClassifier(**params)
	score = cross_val_score(clf, X_train, y_train, cv=5).mean()
	return {'loss': -score, 'status': STATUS_OK}
	from hyperopt import fmin, tpe, Trials
	import numpy as np

	trials = Trials()
	best_param = fmin(objective_function, param_space, algo=tpe.suggest, max_evals=num_eval, trials=trials, rstate= np.random.RandomState(1))
	num_eval =75

	results_grid, estimator_grid = search(pipe, param_gridsearch, X_train, y_train, X_test, y_test, 'grid_search')
	results_random, estimator_random = search(pipe, param_random, X_train, y_train, X_test, y_test, 'random_search', num_eval)
	results_hyperopt = hyperopt(param_hyperopt, X_train, y_train, X_test, y_test, num_eval)
	import airflow
	from airflow.models import DAG
	from airflow.operators.python_operator import PythonOperator
	from src.models.initial_model_functions import load_preprocess, fit_model

	PATH_STREAM_SAMPLE = "/data/stream_sample.p"
	PATH_TEST_SET = "/data/test_set.p"
	INITIAL_MODEL_PATH = "/models/current_model/initial_model.H5"

	BATCH_SIZE = 128
	import keras
	from keras.datasets import mnist
	from keras.models import Sequential, load_model
	from keras.layers import Dense, Dropout, Flatten
	from keras.layers import Conv2D, MaxPooling2D
	from keras import backend as K
	import pickle
	import logging
	import os