fwhigh/aicamp_recipe.sh

## aicamp_recipe.sh
# Make a directory you can blow away in full later
mkdir -p aicamp_demo && cd aicamp_demo

# Clone and pin the fwhigh/metaflow-helper git repo
git clone https://github.com/fwhigh/metaflow-helper.git
cd metaflow-helper
git checkout v0.0.1

# Set up and active a virtual environment
# Install the metaflow-helper package in editable model and dependencies
python -m venv venv && . venv/bin/activate
python -m pip install --upgrade pip
python -m pip install -e .
brew install lightgbm
python -m pip install -r example-requirements.txt

# Test runs and flow visualization
python examples/model-selection/train.py run --help
python examples/model-selection/train.py run --configuration test_randomized_config
brew install graphviz
python examples/model-selection/train.py output-dot | dot -Tpng -o model-selection-flow.png

# Full run
python examples/model-selection/train.py run --configuration randomized_config

## common.py
from importlib import import_module
import subprocess

def system_command_with_retry(cmd: list):
    for i in range(0, 5):
        wait_seconds = 2 ** i
        try:
            status = subprocess.run(cmd)
            if status.returncode != 0:
                print(f'command status was {status}, retrying after {wait_seconds} seconds')
                time.sleep(wait_seconds)
                continue
        except subprocess.CalledProcessError:
            print(f'command failed, retrying after {wait_seconds} seconds')
            time.sleep(wait_seconds)
            continue
        break


def install_dependencies(dependencies: list):
    for dependency in dependencies:
        for k, v in dependency.items():
            try:
                module_ = import_module(k)
            except ModuleNotFoundError:
                system_command_with_retry(['pip', 'install', v])

## debug.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              debug.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## model_selection_contenders_spec.py
from scipy.stats import randint, loguniform

contenders_spec = [
    {
        # This is the algo
        '__model': ['metaflow_helper.models.LightGBMRegressor'],
        # These go to the model initializer
        '__model__init_kwargs__learning_rate': loguniform(1e-2, 1e-1),
        '__model__init_kwargs__max_depth': randint(1, 4),
        '__model__init_kwargs__n_estimators': [10_000],
        # These go to the model fitter
        '__model__fit_kwargs__eval_metric': ['mse'],
        '__model__fit_kwargs__early_stopping_rounds': [10],
        '__model__fit_kwargs__verbose': [0],
        # The presence of this key triggers randomized search
        '__n_iter': 5,
    },
    {
        # This is the algo
        '__model': ['metaflow_helper.models.KerasRegressor'],
        # These go to the model initializer
        '__model__init_kwargs__build_model': ['metaflow_helper.models.build_keras_regression_model'],
        '__model__init_kwargs__metric': ['mse'],
        '__model__init_kwargs__dense_layer_widths': [(), (15,), (15, 15,), (15 * 15,)],
        '__model__init_kwargs__l1_factor': loguniform(1e-8, 1e-2),
        '__model__init_kwargs__l2_factor': loguniform(1e-8, 1e-2),
        # These go to the model fitter
        '__model__fit_kwargs__batch_size': [None],
        '__model__fit_kwargs__epochs': [10_000],
        '__model__fit_kwargs__validation_split': [0.2],
        '__model__fit_kwargs__monitor': ['val_mse'],
        '__model__fit_kwargs__verbose': [0],
        '__model__fit_kwargs__patience': [10],
        '__model__fit_kwargs__min_delta': [0.1],
        # The presence of this key triggers randomized search
        '__n_iter': 5,
    },
]

## model_selection_quickstart_install.sh
python -m venv metaflow-helper-venv && . metaflow-helper-venv/bin/activate

# checkout a tagged commit
git clone https://github.com/fwhigh/metaflow-helper.git
cd metaflow-helper
git checkout v0.0.1
# the package is also available via `pip install metaflow-helper==0.0.1`
python -m pip install --upgrade pip
python -m pip install -e .

## model_selection_quickstart_train_run.sh
python -m pip install -r example-requirements.txt

python examples/model-selection/train.py run --help
#  --configuration TEXT      Which config.py file to use. Available configs:
#                            randomized_config (default),
#                            test_randomized_config, grid_config,
#                            test_grid_config  [default: randomized_config]

python examples/model-selection/train.py run --configuration test_randomized_config

## test_mode.py
from metaflow import FlowSpec, Parameter, step
import commmon


class Train(FlowSpec):
    test_mode = Parameter(
        'test_mode',
        help="Run in test mode?",
        type=bool,
        default=False,
    )

    @step
    def start(self):
        if self.test_mode:
            # Get a subset of data and reduce parallelism here
            self.df = common.get_dataframe(max_rows=100)
            self.max_epochs = 10
            self.patience = 1
        else:
            self.df = common.get_dataframe()
            self.epochs = 10_000
            self.patience = 50
        # Do stuff here
        self.next(self.end)

    @step
    def end(self):
        pass


if __name__ == '__main__':
    Train()

## train.py
from metaflow import FlowSpec, step
import common

class Train(FlowSpec):
    @step
    def start(self):
        self.df = common.get_df()
        # Do stuff here
        self.next(self.end)

    @step
    def end(self):
      pass

if __name__ == '__main__':
    Train()
	# Make a directory you can blow away in full later
	mkdir -p aicamp_demo && cd aicamp_demo

	# Clone and pin the fwhigh/metaflow-helper git repo
	git clone https://github.com/fwhigh/metaflow-helper.git
	cd metaflow-helper
	git checkout v0.0.1

	# Set up and active a virtual environment
	# Install the metaflow-helper package in editable model and dependencies
	python -m venv venv && . venv/bin/activate
	python -m pip install --upgrade pip
	python -m pip install -e .
	brew install lightgbm
	python -m pip install -r example-requirements.txt

	# Test runs and flow visualization
	python examples/model-selection/train.py run --help
	python examples/model-selection/train.py run --configuration test_randomized_config
	brew install graphviz
	python examples/model-selection/train.py output-dot \| dot -Tpng -o model-selection-flow.png

	# Full run
	python examples/model-selection/train.py run --configuration randomized_config
	from importlib import import_module
	import subprocess

	def system_command_with_retry(cmd: list):
	for i in range(0, 5):
	wait_seconds = 2 ** i
	try:
	status = subprocess.run(cmd)
	if status.returncode != 0:
	print(f'command status was {status}, retrying after {wait_seconds} seconds')
	time.sleep(wait_seconds)
	continue
	except subprocess.CalledProcessError:
	print(f'command failed, retrying after {wait_seconds} seconds')
	time.sleep(wait_seconds)
	continue
	break


	def install_dependencies(dependencies: list):
	for dependency in dependencies:
	for k, v in dependency.items():
	try:
	module_ = import_module(k)
	except ModuleNotFoundError:
	system_command_with_retry(['pip', 'install', v])
	from scipy.stats import randint, loguniform

	contenders_spec = [
	{
	# This is the algo
	'__model': ['metaflow_helper.models.LightGBMRegressor'],
	# These go to the model initializer
	'__model__init_kwargs__learning_rate': loguniform(1e-2, 1e-1),
	'__model__init_kwargs__max_depth': randint(1, 4),
	'__model__init_kwargs__n_estimators': [10_000],
	# These go to the model fitter
	'__model__fit_kwargs__eval_metric': ['mse'],
	'__model__fit_kwargs__early_stopping_rounds': [10],
	'__model__fit_kwargs__verbose': [0],
	# The presence of this key triggers randomized search
	'__n_iter': 5,
	},
	{
	# This is the algo
	'__model': ['metaflow_helper.models.KerasRegressor'],
	# These go to the model initializer
	'__model__init_kwargs__build_model': ['metaflow_helper.models.build_keras_regression_model'],
	'__model__init_kwargs__metric': ['mse'],
	'__model__init_kwargs__dense_layer_widths': [(), (15,), (15, 15,), (15 * 15,)],
	'__model__init_kwargs__l1_factor': loguniform(1e-8, 1e-2),
	'__model__init_kwargs__l2_factor': loguniform(1e-8, 1e-2),
	# These go to the model fitter
	'__model__fit_kwargs__batch_size': [None],
	'__model__fit_kwargs__epochs': [10_000],
	'__model__fit_kwargs__validation_split': [0.2],
	'__model__fit_kwargs__monitor': ['val_mse'],
	'__model__fit_kwargs__verbose': [0],
	'__model__fit_kwargs__patience': [10],
	'__model__fit_kwargs__min_delta': [0.1],
	# The presence of this key triggers randomized search
	'__n_iter': 5,
	},
	]
	python -m venv metaflow-helper-venv && . metaflow-helper-venv/bin/activate

	# checkout a tagged commit
	git clone https://github.com/fwhigh/metaflow-helper.git
	cd metaflow-helper
	git checkout v0.0.1
	# the package is also available via `pip install metaflow-helper==0.0.1`
	python -m pip install --upgrade pip
	python -m pip install -e .
	python -m pip install -r example-requirements.txt

	python examples/model-selection/train.py run --help
	# --configuration TEXT Which config.py file to use. Available configs:
	# randomized_config (default),
	# test_randomized_config, grid_config,
	# test_grid_config [default: randomized_config]

	python examples/model-selection/train.py run --configuration test_randomized_config
	from metaflow import FlowSpec, Parameter, step
	import commmon


	class Train(FlowSpec):
	test_mode = Parameter(
	'test_mode',
	help="Run in test mode?",
	type=bool,
	default=False,
	)

	@step
	def start(self):
	if self.test_mode:
	# Get a subset of data and reduce parallelism here
	self.df = common.get_dataframe(max_rows=100)
	self.max_epochs = 10
	self.patience = 1
	else:
	self.df = common.get_dataframe()
	self.epochs = 10_000
	self.patience = 50
	# Do stuff here
	self.next(self.end)

	@step
	def end(self):
	pass


	if __name__ == '__main__':
	Train()
	from metaflow import FlowSpec, step
	import common

	class Train(FlowSpec):
	@step
	def start(self):
	self.df = common.get_df()
	# Do stuff here
	self.next(self.end)

	@step
	def end(self):
	pass

	if __name__ == '__main__':
	Train()