Skip to content

Instantly share code, notes, and snippets.

@zilto
Last active February 6, 2024 01:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zilto/25e205583c50da0e41457b2cd62d1a2d to your computer and use it in GitHub Desktop.
Save zilto/25e205583c50da0e41457b2cd62d1a2d to your computer and use it in GitHub Desktop.
from hamilton import driver
from hamilton.io.materialization import to
from hamilton.plugins import h_experiments, pandas_extensions
import analysis # <- your module
tracker_hook = h_experiments.ExperimentTracker(
experiment_name="hello-world",
base_directory="./experiments",
)
dr = (
driver.Builder()
.with_modules(analysis)
.with_config(dict(model="linear", preprocess="pca"))
.with_adapters(tracker_hook) # <- the hook
.build()
)
inputs = dict(n_splits=4)
# materializers to include in run directory
# use a relative path
materializers = [
to.pickle(
id="trained_model__pickle",
dependencies=["trained_model"],
path="./trained_model.pickle",
),
to.parquet(
id="X_df__parquet",
dependencies=["X_df"],
path="./X_df.parquet",
),
to.pickle(
id="out_of_sample_performance__pickle",
dependencies=["out_of_sample_performance"],
path="./out_of_sample_performance.pickle",
),
]
# pass explicitly `tracker_hook.run_directory`
# to store the dataflow visualization
dr.visualize_materialization(
*materializers,
inputs=inputs,
output_file_path=f"{tracker_hook.run_directory}/dag.png",
)
# launch run with .materialize()
dr.materialize(*materializers, inputs=inputs)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment