This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def train(): | |
# Read Data | |
data = pd.read_csv(DATA_INGESTION['data_path']) | |
target = DATA_INGESTION['data_map']['target'] | |
variables = DATA_INGESTION['data_map']['variables'] | |
#Preprocessing | |
flt = data['umbrella_limit']>=0 | |
data = data[flt] | |
data[target] = data[target].map(FEATURES_ENGINEERING['target_encoding']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Compile pipeline contains the pipeline object | |
''' | |
import data_preprocessing as Data_Prep | |
import feature_engineering as Feat_Eng | |
from imblearn.pipeline import Pipeline | |
from imblearn.over_sampling import SMOTE | |
from sklearn.ensemble import RandomForestClassifier | |
#Utils |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Encoder(BaseEstimator, TransformerMixin): | |
""" A transformer that returns DataFrame | |
with variable encoded. | |
Parameters | |
---------- | |
encoding_meta : list, default=None | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def encoder(data, var, mapping): | |
''' | |
Encode all variables for training | |
:params: data, var, mapping | |
:return: DataFrame | |
''' | |
if var not in data.columns.values.tolist(): | |
pass | |
return data[var].map(mapping) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.base import BaseEstimator, TransformerMixin | |
class Transformer(BaseEstimator, TransformerMixin): | |
def fit(self, X, y=None): | |
return self | |
def transform(self, X): | |
return X | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# update local packages | |
sudo apt-get update -y | |
# install dependencies | |
sudo apt-get install -y python3-pip python3-dev python3-venv | |
# create the python enviroment | |
python3 -m venv pyenv | |
# activate a virtual environment¶ | |
source ./pyenv/bin/activate | |
#install packages | |
pip install -r ./src/score_interactive_endpoint/requirements.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! bin/bash | |
#Pass CLUSTER_NAME, REGION AND BUCKET parameters (or use default parameters) | |
CLUSTER_NAME=${1:-cluster-00000} | |
REGION=${2:-europe-west6} | |
BUCKET=${3:-cloud-demo-databrick-gcp} | |
#Run job | |
gcloud dataproc jobs submit pyspark \ | |
--cluster ${CLUSTER_NAME} \ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# setup_cluster.sh | |
# Create a plain vanilla cluster if doesn't exist with config. | |
# REGION - Region name (default eu) | |
# BUCKET - Bucker name (default cloud-demo-databrick-gcp) | |
#Pass all parameters (or use default parameters) | |
CLUSTER_NAME=${1:-cluster-00000} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# setup.sh | |
# Create a bucket if doesn't exist. | |
# And load deployment scripts(.sh, .py) | |
# REGION - Region name (default eu) | |
# BUCKET - Bucker name (default cloud-demo-databrick-gcp) | |
#Pass REGION and BUCKET names (or use default parameters) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%sh | |
rm -rf /tmp/mleap_python_model_export | |
mkdir /tmp/mleap_python_model_export | |
ls -la /tmp/mleap_python_model_export | |
#Serialize Model to Bundle | |
lrModel.serializeToBundle("jar:file:/tmp/mleap_python_model_export/lrModel.zip", predictions) | |
%sh | |
ls -la /tmp/mleap_python_model_export/ | |
dbutils.fs.cp("file:/tmp/mleap_python_model_export/lrModel.zip", "dbfs:/example/lrModel.zip") | |
display(dbutils.fs.ls("dbfs:/example")) |
NewerOlder