This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def train(): | |
# Read Data | |
data = pd.read_csv(DATA_INGESTION['data_path']) | |
target = DATA_INGESTION['data_map']['target'] | |
variables = DATA_INGESTION['data_map']['variables'] | |
#Preprocessing | |
flt = data['umbrella_limit']>=0 | |
data = data[flt] | |
data[target] = data[target].map(FEATURES_ENGINEERING['target_encoding']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Compile pipeline contains the pipeline object | |
''' | |
import data_preprocessing as Data_Prep | |
import feature_engineering as Feat_Eng | |
from imblearn.pipeline import Pipeline | |
from imblearn.over_sampling import SMOTE | |
from sklearn.ensemble import RandomForestClassifier | |
#Utils |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Encoder(BaseEstimator, TransformerMixin): | |
""" A transformer that returns DataFrame | |
with variable encoded. | |
Parameters | |
---------- | |
encoding_meta : list, default=None | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def encoder(data, var, mapping): | |
''' | |
Encode all variables for training | |
:params: data, var, mapping | |
:return: DataFrame | |
''' | |
if var not in data.columns.values.tolist(): | |
pass | |
return data[var].map(mapping) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.base import BaseEstimator, TransformerMixin | |
class Transformer(BaseEstimator, TransformerMixin): | |
def fit(self, X, y=None): | |
return self | |
def transform(self, X): | |
return X | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! bin/bash | |
#Pass CLUSTER_NAME, REGION AND BUCKET parameters (or use default parameters) | |
CLUSTER_NAME=${1:-cluster-00000} | |
REGION=${2:-europe-west6} | |
BUCKET=${3:-cloud-demo-databrick-gcp} | |
#Run job | |
gcloud dataproc jobs submit pyspark \ | |
--cluster ${CLUSTER_NAME} \ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# setup_cluster.sh | |
# Create a plain vanilla cluster if doesn't exist with config. | |
# REGION - Region name (default eu) | |
# BUCKET - Bucker name (default cloud-demo-databrick-gcp) | |
#Pass all parameters (or use default parameters) | |
CLUSTER_NAME=${1:-cluster-00000} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# setup.sh | |
# Create a bucket if doesn't exist. | |
# And load deployment scripts(.sh, .py) | |
# REGION - Region name (default eu) | |
# BUCKET - Bucker name (default cloud-demo-databrick-gcp) | |
#Pass REGION and BUCKET names (or use default parameters) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%sh | |
rm -rf /tmp/mleap_python_model_export | |
mkdir /tmp/mleap_python_model_export | |
ls -la /tmp/mleap_python_model_export | |
#Serialize Model to Bundle | |
lrModel.serializeToBundle("jar:file:/tmp/mleap_python_model_export/lrModel.zip", predictions) | |
%sh | |
ls -la /tmp/mleap_python_model_export/ | |
dbutils.fs.cp("file:/tmp/mleap_python_model_export/lrModel.zip", "dbfs:/example/lrModel.zip") | |
display(dbutils.fs.ls("dbfs:/example")) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Define tracking function | |
def log_lineareg(experimentID, run_name, params, abt_train, abt_test, debug=False): | |
""" | |
Function to start a run within a existing experiment | |
:param experimentID: unique ID associated to original experiment | |
:param run_name: label for the name of the run | |
:param params: ters used for the run, such as arguments | |
:param abt_train: analytical base table for training | |
:param abt_test: analytical base table for testing |
NewerOlder