Skip to content

Instantly share code, notes, and snippets.

View cereniyim's full-sized avatar

Ceren cereniyim

View GitHub Profile
@cereniyim
cereniyim / fit_and_save_model.py
Last active October 19, 2023 12:36
Estimator wrapper class
@dataclass
class ModelMetaData:
estimator: IsolationForest
model_path: str
class AnomalyDetector:
def __init__(self, estimator=None):
self._estimator = estimator or IsolationForest(
random_state=42,
#!/usr/bin/env bash
if test -z "$1"
then
echo "Usage ./build-task-images.sh VERSION"
echo "No version was passed! Please pass a version to the script e.g. 0.1"
exit 1
fi
VERSION=$1
docker build -t code-challenge/base-docker base_docker
import pandas as pd
import numpy as np
import click
from pathlib import Path
from sklearn.ensemble import RandomForestRegressor
import pickle
def convert_features_to_array(features):
num_rows = len(features)
num_cols = len(features.columns)
@cereniyim
cereniyim / docker-compose.yml
Created June 6, 2020 13:49
docker compose for wine rating predictor
version: '2'
services:
luigid:
image: code-challenge/orchestrator:latest
build:
context: ./orchestrator
command: luigid
ports:
- "8082:8082"
@cereniyim
cereniyim / task.py
Last active June 6, 2020 13:56
last two steps of the task.py
import luigi
import os
from pathlib import Path
from util import DockerTask
class TrainModel(DockerTask):
"""Task to train random forest regressor with training datasets"""
in_path = '/usr/share/data/interim/'
@cereniyim
cereniyim / make_dataset_task.py
Created June 6, 2020 10:00
make dataset task
class MakeDatasets(DockerTask):
"""Task to separate training and test sets"""
in_csv = luigi.Parameter(default='/usr/share/data/raw/wine_dataset.csv')
out_dir = luigi.Parameter(default='/usr/share/data/interim/')
flag = luigi.Parameter('.SUCCESS_MakeDatasets')
@property
def image(self):
return f'code-challenge/make-dataset:{VERSION}'
@cereniyim
cereniyim / plot_feature_importances.py
Created May 30, 2020 10:54
Plot feature importances of a ML model
def PlotFeatureImportances(model, feature_names):
feature_importances = (pd
.DataFrame(
{'feature': feature_names,
'importance': model
.feature_importances_}))
feature_importances = (feature_importances
.sort_values(by="importance",
ascending=False))
@cereniyim
cereniyim / plot_actuals_and_predictions.py
Created May 30, 2020 10:53
Plot the distribution of two different array
def PlotPredictedVSActual(predictions, actuals):
figsize(20, 10)
plt.rcParams['font.size'] = 14
# histogram of predictions
ax = plt.subplot(121)
ax.hist(predictions,
bins=10,
color = "#971539",
edgecolor = 'white')
@cereniyim
cereniyim / wine_rating_predictor_imports3.py
Created May 30, 2020 10:49
Imports of the 3rd Notebook
# visualization and manipulation libraries
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.core.pylabtools import figsize
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', 60)
%matplotlib inline
# to display visuals in the notebook
@cereniyim
cereniyim / fit_evaluate_model.py
Created May 21, 2020 17:37
function to train and evaluate the model with mean square error
def fit_evaluate_model(model, X_train, y_train,
X_valid, y_valid):
# function to train a given model
# return mean squared error of the
# actuals and predictions
model.fit(X_train, y_train)
y_predicted = model.predict(X_valid)
return mean_squared_error(y_valid, y_predicted)