Skip to content

Instantly share code, notes, and snippets.

@fwhigh
Last active October 14, 2021 17:46
Embed
What would you like to do?
Blog post: lightgbm-vs-keras-metaflow
# Make a directory you can blow away in full later
mkdir -p aicamp_demo && cd aicamp_demo
# Clone and pin the fwhigh/metaflow-helper git repo
git clone https://github.com/fwhigh/metaflow-helper.git
cd metaflow-helper
git checkout v0.0.1
# Set up and active a virtual environment
# Install the metaflow-helper package in editable model and dependencies
python -m venv venv && . venv/bin/activate
python -m pip install --upgrade pip
python -m pip install -e .
brew install lightgbm
python -m pip install -r example-requirements.txt
# Test runs and flow visualization
python examples/model-selection/train.py run --help
python examples/model-selection/train.py run --configuration test_randomized_config
brew install graphviz
python examples/model-selection/train.py output-dot | dot -Tpng -o model-selection-flow.png
# Full run
python examples/model-selection/train.py run --configuration randomized_config
from importlib import import_module
import subprocess
def system_command_with_retry(cmd: list):
for i in range(0, 5):
wait_seconds = 2 ** i
try:
status = subprocess.run(cmd)
if status.returncode != 0:
print(f'command status was {status}, retrying after {wait_seconds} seconds')
time.sleep(wait_seconds)
continue
except subprocess.CalledProcessError:
print(f'command failed, retrying after {wait_seconds} seconds')
time.sleep(wait_seconds)
continue
break
def install_dependencies(dependencies: list):
for dependency in dependencies:
for k, v in dependency.items():
try:
module_ = import_module(k)
except ModuleNotFoundError:
system_command_with_retry(['pip', 'install', v])
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
from scipy.stats import randint, loguniform
contenders_spec = [
{
# This is the algo
'__model': ['metaflow_helper.models.LightGBMRegressor'],
# These go to the model initializer
'__model__init_kwargs__learning_rate': loguniform(1e-2, 1e-1),
'__model__init_kwargs__max_depth': randint(1, 4),
'__model__init_kwargs__n_estimators': [10_000],
# These go to the model fitter
'__model__fit_kwargs__eval_metric': ['mse'],
'__model__fit_kwargs__early_stopping_rounds': [10],
'__model__fit_kwargs__verbose': [0],
# The presence of this key triggers randomized search
'__n_iter': 5,
},
{
# This is the algo
'__model': ['metaflow_helper.models.KerasRegressor'],
# These go to the model initializer
'__model__init_kwargs__build_model': ['metaflow_helper.models.build_keras_regression_model'],
'__model__init_kwargs__metric': ['mse'],
'__model__init_kwargs__dense_layer_widths': [(), (15,), (15, 15,), (15 * 15,)],
'__model__init_kwargs__l1_factor': loguniform(1e-8, 1e-2),
'__model__init_kwargs__l2_factor': loguniform(1e-8, 1e-2),
# These go to the model fitter
'__model__fit_kwargs__batch_size': [None],
'__model__fit_kwargs__epochs': [10_000],
'__model__fit_kwargs__validation_split': [0.2],
'__model__fit_kwargs__monitor': ['val_mse'],
'__model__fit_kwargs__verbose': [0],
'__model__fit_kwargs__patience': [10],
'__model__fit_kwargs__min_delta': [0.1],
# The presence of this key triggers randomized search
'__n_iter': 5,
},
]
python -m venv metaflow-helper-venv && . metaflow-helper-venv/bin/activate
# checkout a tagged commit
git clone https://github.com/fwhigh/metaflow-helper.git
cd metaflow-helper
git checkout v0.0.1
# the package is also available via `pip install metaflow-helper==0.0.1`
python -m pip install --upgrade pip
python -m pip install -e .
python -m pip install -r example-requirements.txt
python examples/model-selection/train.py run --help
# --configuration TEXT Which config.py file to use. Available configs:
# randomized_config (default),
# test_randomized_config, grid_config,
# test_grid_config [default: randomized_config]
python examples/model-selection/train.py run --configuration test_randomized_config
from metaflow import FlowSpec, Parameter, step
import commmon
class Train(FlowSpec):
test_mode = Parameter(
'test_mode',
help="Run in test mode?",
type=bool,
default=False,
)
@step
def start(self):
if self.test_mode:
# Get a subset of data and reduce parallelism here
self.df = common.get_dataframe(max_rows=100)
self.max_epochs = 10
self.patience = 1
else:
self.df = common.get_dataframe()
self.epochs = 10_000
self.patience = 50
# Do stuff here
self.next(self.end)
@step
def end(self):
pass
if __name__ == '__main__':
Train()
from metaflow import FlowSpec, step
import common
class Train(FlowSpec):
@step
def start(self):
self.df = common.get_df()
# Do stuff here
self.next(self.end)
@step
def end(self):
pass
if __name__ == '__main__':
Train()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment