Dylan Valerio krsnewwave

## pytorch_multitask_paintings.py
# following https://towardsdatascience.com/multilabel-classification-with-pytorch-in-5-minutes-a4fa8993cbc7
class LightningResNetMultiLabel(pl.LightningModule):
    def __init__(self, net, n_period, n_artists, criterion = F.cross_entropy, optimizer = None, scheduler = None, dropout_p = 0., lr=0.001, freeze_net=False):
        super().__init__()
        self.net = net

        self.feature_extractor = nn.Sequential(*(list(self.net.children())[:-1]))

        if freeze_net:
            for param in self.net.parameters():

## pipeline_registry.py
# in <root>/src/<project>/pipeline_registry.py

def register_pipelines() -> Dict[str, Pipeline]:
    data_engineering_pipeline = de.create_pipeline()
    xgb_pipe = ds.create_xgb_pipeline()
    rr_pipe = ds.create_rr_pipeline()
    logres_pipe = ds.create_logres_pipeline()
    rr_ho_pipe = ds.create_rr_ho_pipeline()

    return {

## pipeline.py
# in <root>/src/<project>/pipelines/data_science/pipeline.py

from kedro.pipeline import node, pipeline
from .nodes import split_data, fit_xgboost

def create_plot_roc_node():
    return node(
        func=plot_roc,
        inputs=["clf", "X_test", "y_test"],
        outputs="roc_graph",

## nodes_with_optuna.py
# in <root>/src/<project>/pipelines/data_science/nodes.py

def rr_objective(X_train: pd.DataFrame, y_train: pd.Series,
              X_test: pd.DataFrame, y_test: pd.Series,
              trial: optuna.trial):
    max_depth = trial.suggest_int("max_depth", 8, 64, log=True)
    min_samples_split = trial.suggest_int("min_samples_split", 50, 1000, )
    ccp_alpha = trial.suggest_float("ccp_alpha", 0.001, 0.03, log=True)
    rr_clf = RandomForestClassifier(max_depth=max_depth,
                                    min_samples_split=min_samples_split,

## catalog.yaml
# in <root>/conf/base/catalog.yaml

insurance:
  type: pandas.CSVDataSet
  filepath: data/01_raw/train.csv
  layer: raw

model_input_table:
  type: pandas.ParquetDataSet
  filepath: data/03_primary/model_input_table.pq
	# following https://towardsdatascience.com/multilabel-classification-with-pytorch-in-5-minutes-a4fa8993cbc7
	class LightningResNetMultiLabel(pl.LightningModule):
	def __init__(self, net, n_period, n_artists, criterion = F.cross_entropy, optimizer = None, scheduler = None, dropout_p = 0., lr=0.001, freeze_net=False):
	super().__init__()
	self.net = net

	self.feature_extractor = nn.Sequential(*(list(self.net.children())[:-1]))

	if freeze_net:
	for param in self.net.parameters():
	# in <root>/src/<project>/pipeline_registry.py

	def register_pipelines() -> Dict[str, Pipeline]:
	data_engineering_pipeline = de.create_pipeline()
	xgb_pipe = ds.create_xgb_pipeline()
	rr_pipe = ds.create_rr_pipeline()
	logres_pipe = ds.create_logres_pipeline()
	rr_ho_pipe = ds.create_rr_ho_pipeline()

	return {
	# in <root>/src/<project>/pipelines/data_science/pipeline.py

	from kedro.pipeline import node, pipeline
	from .nodes import split_data, fit_xgboost

	def create_plot_roc_node():
	return node(
	func=plot_roc,
	inputs=["clf", "X_test", "y_test"],
	outputs="roc_graph",
	# in <root>/src/<project>/pipelines/data_science/nodes.py

	def rr_objective(X_train: pd.DataFrame, y_train: pd.Series,
	X_test: pd.DataFrame, y_test: pd.Series,
	trial: optuna.trial):
	max_depth = trial.suggest_int("max_depth", 8, 64, log=True)
	min_samples_split = trial.suggest_int("min_samples_split", 50, 1000, )
	ccp_alpha = trial.suggest_float("ccp_alpha", 0.001, 0.03, log=True)
	rr_clf = RandomForestClassifier(max_depth=max_depth,
	min_samples_split=min_samples_split,
	# in <root>/conf/base/catalog.yaml

	insurance:
	type: pandas.CSVDataSet
	filepath: data/01_raw/train.csv
	layer: raw

	model_input_table:
	type: pandas.ParquetDataSet
	filepath: data/03_primary/model_input_table.pq