zilto/hamilton_experiment_driver.py Secret

## hamilton_experiment_driver.py
from hamilton import driver
from hamilton.io.materialization import to
from hamilton.plugins import h_experiments, pandas_extensions
import analysis  # <- your module

tracker_hook = h_experiments.ExperimentTracker(
  experiment_name="hello-world",
  base_directory="./experiments",
)

dr = (
  driver.Builder()
  .with_modules(analysis)
  .with_config(dict(model="linear", preprocess="pca"))
  .with_adapters(tracker_hook)  # <- the hook
  .build()
)

inputs = dict(n_splits=4)

# materializers to include in run directory
# use a relative path
materializers = [
  to.pickle(
    id="trained_model__pickle",
    dependencies=["trained_model"],
    path="./trained_model.pickle",
  ),
  to.parquet(
    id="X_df__parquet",
    dependencies=["X_df"],
    path="./X_df.parquet",
  ),
  to.pickle(
    id="out_of_sample_performance__pickle",
    dependencies=["out_of_sample_performance"],
    path="./out_of_sample_performance.pickle",
  ),
]

# pass explicitly `tracker_hook.run_directory`
# to store the dataflow visualization
dr.visualize_materialization(
  *materializers,
  inputs=inputs,
  output_file_path=f"{tracker_hook.run_directory}/dag.png",
)

# launch run with .materialize()
dr.materialize(*materializers, inputs=inputs)
	from hamilton import driver
	from hamilton.io.materialization import to
	from hamilton.plugins import h_experiments, pandas_extensions
	import analysis # <- your module

	tracker_hook = h_experiments.ExperimentTracker(
	experiment_name="hello-world",
	base_directory="./experiments",
	)

	dr = (
	driver.Builder()
	.with_modules(analysis)
	.with_config(dict(model="linear", preprocess="pca"))
	.with_adapters(tracker_hook) # <- the hook
	.build()
	)

	inputs = dict(n_splits=4)

	# materializers to include in run directory
	# use a relative path
	materializers = [
	to.pickle(
	id="trained_model__pickle",
	dependencies=["trained_model"],
	path="./trained_model.pickle",
	),
	to.parquet(
	id="X_df__parquet",
	dependencies=["X_df"],
	path="./X_df.parquet",
	),
	to.pickle(
	id="out_of_sample_performance__pickle",
	dependencies=["out_of_sample_performance"],
	path="./out_of_sample_performance.pickle",
	),
	]

	# pass explicitly `tracker_hook.run_directory`
	# to store the dataflow visualization
	dr.visualize_materialization(
	*materializers,
	inputs=inputs,
	output_file_path=f"{tracker_hook.run_directory}/dag.png",
	)

	# launch run with .materialize()
	dr.materialize(*materializers, inputs=inputs)