Skip to content

Instantly share code, notes, and snippets.

@ronylpatil
Created January 11, 2024 09:56
Show Gist options
  • Save ronylpatil/e2af523b79895f4bc510f9906a43c840 to your computer and use it in GitHub Desktop.
Save ronylpatil/e2af523b79895f4bc510f9906a43c840 to your computer and use it in GitHub Desktop.
# file: dvc.yaml
stages:
load_dataset:
cmd: python ./src/data/load_dataset.py
deps:
- ./src/data/load_dataset.py
params:
- data_source.drive
- load_dataset.raw_data
- load_dataset.file_name
outs:
# way to read parameters from params.yaml
- .${load_dataset.raw_data}/${load_dataset.file_name}.csv # dump raw data at some loc
make_dataset:
cmd: python ./src/data/make_dataset.py
deps:
- ./src/data/make_dataset.py
- .${load_dataset.raw_data}/${load_dataset.file_name}.csv # depend on raw data dumped in previous stage
params:
- make_dataset.test_split
- make_dataset.random_state
- make_dataset.processed_data
- load_dataset.raw_data
- load_dataset.file_name
outs:
- .${make_dataset.processed_data}/train.csv
- .${make_dataset.processed_data}/test.csv
train_model:
cmd: python ./src/models/train_model.py
deps:
- .${make_dataset.processed_data}/train.csv # depend on train.csv
- ./src/models/train_model.py
params:
- train_model.seed
- train_model.n_estimators
- train_model.max_depth
- train_model.model_loc
- make_dataset.processed_data
- base.target_col
outs:
- .${train_model.model_loc}/model.joblib
predict_model:
cmd: python ./src/models/predict_model.py
deps:
- ./src/models/predict_model.py
- .${make_dataset.processed_data}/test.csv
- .${train_model.model_loc}/model.joblib # depend on model.joblib generated by previous stage
params:
- train_model.model_loc
- make_dataset.processed_data
- base.target_col
params:
- dvclive/params.yaml
metrics:
- dvclive/metrics.json
plots:
- dvclive/plots/metrics:
x: step
# see every stage is consuming/depends on output of previous stage, this is how we stitch the stages and form a pipeline.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment