This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@dataclass | |
class ModelMetaData: | |
estimator: IsolationForest | |
model_path: str | |
class AnomalyDetector: | |
def __init__(self, estimator=None): | |
self._estimator = estimator or IsolationForest( | |
random_state=42, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
if test -z "$1" | |
then | |
echo "Usage ./build-task-images.sh VERSION" | |
echo "No version was passed! Please pass a version to the script e.g. 0.1" | |
exit 1 | |
fi | |
VERSION=$1 | |
docker build -t code-challenge/base-docker base_docker |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import click | |
from pathlib import Path | |
from sklearn.ensemble import RandomForestRegressor | |
import pickle | |
def convert_features_to_array(features): | |
num_rows = len(features) | |
num_cols = len(features.columns) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
version: '2' | |
services: | |
luigid: | |
image: code-challenge/orchestrator:latest | |
build: | |
context: ./orchestrator | |
command: luigid | |
ports: | |
- "8082:8082" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import luigi | |
import os | |
from pathlib import Path | |
from util import DockerTask | |
class TrainModel(DockerTask): | |
"""Task to train random forest regressor with training datasets""" | |
in_path = '/usr/share/data/interim/' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class MakeDatasets(DockerTask): | |
"""Task to separate training and test sets""" | |
in_csv = luigi.Parameter(default='/usr/share/data/raw/wine_dataset.csv') | |
out_dir = luigi.Parameter(default='/usr/share/data/interim/') | |
flag = luigi.Parameter('.SUCCESS_MakeDatasets') | |
@property | |
def image(self): | |
return f'code-challenge/make-dataset:{VERSION}' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def PlotFeatureImportances(model, feature_names): | |
feature_importances = (pd | |
.DataFrame( | |
{'feature': feature_names, | |
'importance': model | |
.feature_importances_})) | |
feature_importances = (feature_importances | |
.sort_values(by="importance", | |
ascending=False)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def PlotPredictedVSActual(predictions, actuals): | |
figsize(20, 10) | |
plt.rcParams['font.size'] = 14 | |
# histogram of predictions | |
ax = plt.subplot(121) | |
ax.hist(predictions, | |
bins=10, | |
color = "#971539", | |
edgecolor = 'white') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# visualization and manipulation libraries | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from IPython.core.pylabtools import figsize | |
import pandas as pd | |
import numpy as np | |
pd.set_option('display.max_columns', 60) | |
%matplotlib inline | |
# to display visuals in the notebook |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def fit_evaluate_model(model, X_train, y_train, | |
X_valid, y_valid): | |
# function to train a given model | |
# return mean squared error of the | |
# actuals and predictions | |
model.fit(X_train, y_train) | |
y_predicted = model.predict(X_valid) | |
return mean_squared_error(y_valid, y_predicted) |
NewerOlder