Skip to content

Instantly share code, notes, and snippets.

@fwhigh
Last active October 14, 2021 17:46
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fwhigh/c6f9c88cf94cedf2e96d6900ac0f1226 to your computer and use it in GitHub Desktop.
Save fwhigh/c6f9c88cf94cedf2e96d6900ac0f1226 to your computer and use it in GitHub Desktop.
Blog post: lightgbm-vs-keras-metaflow
# Make a directory you can blow away in full later
mkdir -p aicamp_demo && cd aicamp_demo
# Clone and pin the fwhigh/metaflow-helper git repo
git clone https://github.com/fwhigh/metaflow-helper.git
cd metaflow-helper
git checkout v0.0.1
# Set up and active a virtual environment
# Install the metaflow-helper package in editable model and dependencies
python -m venv venv && . venv/bin/activate
python -m pip install --upgrade pip
python -m pip install -e .
brew install lightgbm
python -m pip install -r example-requirements.txt
# Test runs and flow visualization
python examples/model-selection/train.py run --help
python examples/model-selection/train.py run --configuration test_randomized_config
brew install graphviz
python examples/model-selection/train.py output-dot | dot -Tpng -o model-selection-flow.png
# Full run
python examples/model-selection/train.py run --configuration randomized_config
from importlib import import_module
import subprocess
def system_command_with_retry(cmd: list):
for i in range(0, 5):
wait_seconds = 2 ** i
try:
status = subprocess.run(cmd)
if status.returncode != 0:
print(f'command status was {status}, retrying after {wait_seconds} seconds')
time.sleep(wait_seconds)
continue
except subprocess.CalledProcessError:
print(f'command failed, retrying after {wait_seconds} seconds')
time.sleep(wait_seconds)
continue
break
def install_dependencies(dependencies: list):
for dependency in dependencies:
for k, v in dependency.items():
try:
module_ = import_module(k)
except ModuleNotFoundError:
system_command_with_retry(['pip', 'install', v])
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
from scipy.stats import randint, loguniform
contenders_spec = [
{
# This is the algo
'__model': ['metaflow_helper.models.LightGBMRegressor'],
# These go to the model initializer
'__model__init_kwargs__learning_rate': loguniform(1e-2, 1e-1),
'__model__init_kwargs__max_depth': randint(1, 4),
'__model__init_kwargs__n_estimators': [10_000],
# These go to the model fitter
'__model__fit_kwargs__eval_metric': ['mse'],
'__model__fit_kwargs__early_stopping_rounds': [10],
'__model__fit_kwargs__verbose': [0],
# The presence of this key triggers randomized search
'__n_iter': 5,
},
{
# This is the algo
'__model': ['metaflow_helper.models.KerasRegressor'],
# These go to the model initializer
'__model__init_kwargs__build_model': ['metaflow_helper.models.build_keras_regression_model'],
'__model__init_kwargs__metric': ['mse'],
'__model__init_kwargs__dense_layer_widths': [(), (15,), (15, 15,), (15 * 15,)],
'__model__init_kwargs__l1_factor': loguniform(1e-8, 1e-2),
'__model__init_kwargs__l2_factor': loguniform(1e-8, 1e-2),
# These go to the model fitter
'__model__fit_kwargs__batch_size': [None],
'__model__fit_kwargs__epochs': [10_000],
'__model__fit_kwargs__validation_split': [0.2],
'__model__fit_kwargs__monitor': ['val_mse'],
'__model__fit_kwargs__verbose': [0],
'__model__fit_kwargs__patience': [10],
'__model__fit_kwargs__min_delta': [0.1],
# The presence of this key triggers randomized search
'__n_iter': 5,
},
]
python -m venv metaflow-helper-venv && . metaflow-helper-venv/bin/activate
# checkout a tagged commit
git clone https://github.com/fwhigh/metaflow-helper.git
cd metaflow-helper
git checkout v0.0.1
# the package is also available via `pip install metaflow-helper==0.0.1`
python -m pip install --upgrade pip
python -m pip install -e .
python -m pip install -r example-requirements.txt
python examples/model-selection/train.py run --help
# --configuration TEXT Which config.py file to use. Available configs:
# randomized_config (default),
# test_randomized_config, grid_config,
# test_grid_config [default: randomized_config]
python examples/model-selection/train.py run --configuration test_randomized_config
from metaflow import FlowSpec, Parameter, step
import commmon
class Train(FlowSpec):
test_mode = Parameter(
'test_mode',
help="Run in test mode?",
type=bool,
default=False,
)
@step
def start(self):
if self.test_mode:
# Get a subset of data and reduce parallelism here
self.df = common.get_dataframe(max_rows=100)
self.max_epochs = 10
self.patience = 1
else:
self.df = common.get_dataframe()
self.epochs = 10_000
self.patience = 50
# Do stuff here
self.next(self.end)
@step
def end(self):
pass
if __name__ == '__main__':
Train()
from metaflow import FlowSpec, step
import common
class Train(FlowSpec):
@step
def start(self):
self.df = common.get_df()
# Do stuff here
self.next(self.end)
@step
def end(self):
pass
if __name__ == '__main__':
Train()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment