Dylan Valerio krsnewwave

## pipeline_registry.py
# in <root>/src/<project>/pipeline_registry.py

def register_pipelines() -> Dict[str, Pipeline]:
    data_engineering_pipeline = de.create_pipeline()
    xgb_pipe = ds.create_xgb_pipeline()
    rr_pipe = ds.create_rr_pipeline()
    logres_pipe = ds.create_logres_pipeline()
    rr_ho_pipe = ds.create_rr_ho_pipeline()

    return {

## pipeline.py
# in <root>/src/<project>/pipelines/data_science/pipeline.py

from kedro.pipeline import node, pipeline
from .nodes import split_data, fit_xgboost

def create_plot_roc_node():
    return node(
        func=plot_roc,
        inputs=["clf", "X_test", "y_test"],
        outputs="roc_graph",

## nodes_with_optuna.py
# in <root>/src/<project>/pipelines/data_science/nodes.py

def rr_objective(X_train: pd.DataFrame, y_train: pd.Series,
              X_test: pd.DataFrame, y_test: pd.Series,
              trial: optuna.trial):
    max_depth = trial.suggest_int("max_depth", 8, 64, log=True)
    min_samples_split = trial.suggest_int("min_samples_split", 50, 1000, )
    ccp_alpha = trial.suggest_float("ccp_alpha", 0.001, 0.03, log=True)
    rr_clf = RandomForestClassifier(max_depth=max_depth,
                                    min_samples_split=min_samples_split,

## catalog.yaml
# in <root>/conf/base/catalog.yaml

insurance:
  type: pandas.CSVDataSet
  filepath: data/01_raw/train.csv
  layer: raw

model_input_table:
  type: pandas.ParquetDataSet
  filepath: data/03_primary/model_input_table.pq

## create_kedro_project.sh
# (1) virtual environment
conda activate kedro-env
pip install kedro kedro-mlflow optuna kedro-viz

# (2) new project with starter
# put anything, for me, I wrote 'tutorial'
kedro new --starter=pandas-iris

cd tutorial
# (3) fire up git. The starter already has a gitignore file and
	# in <root>/src/<project>/pipeline_registry.py

	def register_pipelines() -> Dict[str, Pipeline]:
	data_engineering_pipeline = de.create_pipeline()
	xgb_pipe = ds.create_xgb_pipeline()
	rr_pipe = ds.create_rr_pipeline()
	logres_pipe = ds.create_logres_pipeline()
	rr_ho_pipe = ds.create_rr_ho_pipeline()

	return {
	# in <root>/src/<project>/pipelines/data_science/pipeline.py

	from kedro.pipeline import node, pipeline
	from .nodes import split_data, fit_xgboost

	def create_plot_roc_node():
	return node(
	func=plot_roc,
	inputs=["clf", "X_test", "y_test"],
	outputs="roc_graph",
	# in <root>/src/<project>/pipelines/data_science/nodes.py

	def rr_objective(X_train: pd.DataFrame, y_train: pd.Series,
	X_test: pd.DataFrame, y_test: pd.Series,
	trial: optuna.trial):
	max_depth = trial.suggest_int("max_depth", 8, 64, log=True)
	min_samples_split = trial.suggest_int("min_samples_split", 50, 1000, )
	ccp_alpha = trial.suggest_float("ccp_alpha", 0.001, 0.03, log=True)
	rr_clf = RandomForestClassifier(max_depth=max_depth,
	min_samples_split=min_samples_split,
	# in <root>/conf/base/catalog.yaml

	insurance:
	type: pandas.CSVDataSet
	filepath: data/01_raw/train.csv
	layer: raw

	model_input_table:
	type: pandas.ParquetDataSet
	filepath: data/03_primary/model_input_table.pq
	# (1) virtual environment
	conda activate kedro-env
	pip install kedro kedro-mlflow optuna kedro-viz

	# (2) new project with starter
	# put anything, for me, I wrote 'tutorial'
	kedro new --starter=pandas-iris

	cd tutorial
	# (3) fire up git. The starter already has a gitignore file and