Skip to content

Instantly share code, notes, and snippets.

@gregsheremeta
Created March 28, 2024 18:59
Show Gist options
  • Save gregsheremeta/c0fff5466e3135e950819ff48a6f33a1 to your computer and use it in GitHub Desktop.
Save gregsheremeta/c0fff5466e3135e950819ff48a6f33a1 to your computer and use it in GitHub Desktop.
automl-tabular.yaml
# PIPELINE DEFINITION
# Name: automl-tabular
# Description: The AutoML Tabular pipeline v1.
# Inputs:
# additional_experiments: dict
# cv_trainer_worker_pool_specs_override: list
# data_source_bigquery_table_path: str [Default: '']
# data_source_csv_filenames: str [Default: '']
# dataflow_service_account: str [Default: '']
# dataflow_subnetwork: str [Default: '']
# dataflow_use_public_ips: bool [Default: True]
# disable_early_stopping: bool [Default: False]
# distill_batch_predict_machine_type: str [Default: 'n1-standard-16']
# distill_batch_predict_max_replica_count: int [Default: 25.0]
# distill_batch_predict_starting_replica_count: int [Default: 25.0]
# enable_probabilistic_inference: bool [Default: False]
# encryption_spec_key_name: str [Default: '']
# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8']
# evaluation_batch_explain_max_replica_count: int [Default: 10.0]
# evaluation_batch_explain_starting_replica_count: int [Default: 10.0]
# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8']
# evaluation_batch_predict_max_replica_count: int [Default: 20.0]
# evaluation_batch_predict_starting_replica_count: int [Default: 20.0]
# evaluation_dataflow_disk_size_gb: int [Default: 50.0]
# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4']
# evaluation_dataflow_max_num_workers: int [Default: 100.0]
# evaluation_dataflow_starting_num_workers: int [Default: 10.0]
# export_additional_model_without_custom_ops: bool [Default: False]
# fast_testing: bool [Default: False]
# location: str
# model_description: str [Default: '']
# model_display_name: str [Default: '']
# optimization_objective: str
# optimization_objective_precision_value: float [Default: -1.0]
# optimization_objective_recall_value: float [Default: -1.0]
# parent_model: system.Artifact
# predefined_split_key: str [Default: '']
# prediction_type: str
# project: str
# quantiles: list
# root_dir: str
# run_distillation: bool [Default: False]
# run_evaluation: bool [Default: False]
# stage_1_num_parallel_trials: int [Default: 35.0]
# stage_1_tuner_worker_pool_specs_override: list
# stage_1_tuning_result_artifact_uri: str [Default: '']
# stage_2_num_parallel_trials: int [Default: 35.0]
# stage_2_num_selected_trials: int [Default: 5.0]
# stats_and_example_gen_dataflow_disk_size_gb: int [Default: 40.0]
# stats_and_example_gen_dataflow_machine_type: str [Default: 'n1-standard-16']
# stats_and_example_gen_dataflow_max_num_workers: int [Default: 25.0]
# stratified_split_key: str [Default: '']
# study_spec_parameters_override: list
# target_column: str
# test_fraction: float [Default: -1.0]
# timestamp_split_key: str [Default: '']
# train_budget_milli_node_hours: float
# training_fraction: float [Default: -1.0]
# transform_dataflow_disk_size_gb: int [Default: 40.0]
# transform_dataflow_machine_type: str [Default: 'n1-standard-16']
# transform_dataflow_max_num_workers: int [Default: 25.0]
# transformations: str
# validation_fraction: float [Default: -1.0]
# vertex_dataset: system.Artifact
# weight_column: str [Default: '']
# Outputs:
# feature-attribution-2-feature_attributions: system.Metrics
# feature-attribution-3-feature_attributions: system.Metrics
# feature-attribution-feature_attributions: system.Metrics
# model-evaluation-2-evaluation_metrics: system.Metrics
# model-evaluation-3-evaluation_metrics: system.Metrics
# model-evaluation-evaluation_metrics: system.Metrics
components:
comp-automl-tabular-cv-trainer:
executorLabel: exec-automl-tabular-cv-trainer
inputDefinitions:
artifacts:
materialized_cv_splits:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The materialized cross-validation splits.
metadata:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The tabular example gen metadata.
transform_output:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The transform output artifact.
tuning_result_input:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: AutoML Tabular tuning result.
parameters:
deadline_hours:
description: Number of hours the cross-validation trainer should run.
parameterType: NUMBER_DOUBLE
encryption_spec_key_name:
defaultValue: ''
description: Customer-managed encryption key.
isOptional: true
parameterType: STRING
location:
description: Location for running the Cross-validation trainer.
parameterType: STRING
num_parallel_trials:
description: Number of parallel training trials.
parameterType: NUMBER_INTEGER
num_selected_features:
defaultValue: 0.0
description: 'Number of selected features. The number of
features to learn in the NN models.'
isOptional: true
parameterType: NUMBER_INTEGER
num_selected_trials:
description: 'Number of selected trials. The number of weak
learners in the final model is 5 * num_selected_trials.'
parameterType: NUMBER_INTEGER
project:
description: Project to run Cross-validation trainer.
parameterType: STRING
root_dir:
description: The Cloud Storage location to store the output.
parameterType: STRING
single_run_max_secs:
description: Max number of seconds each training trial runs.
parameterType: NUMBER_INTEGER
worker_pool_specs_override_json:
defaultValue: []
description: 'JSON worker pool specs. E.g.,
[{"machine_spec": {"machine_type":
"n1-standard-16"}},{},{},{"machine_spec": {"machine_type":
"n1-standard-16"}}]'
isOptional: true
parameterType: LIST
outputDefinitions:
artifacts:
tuning_result_output:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The trained model and architectures.
parameters:
execution_metrics:
description: Core metrics in dictionary of component execution.
parameterType: STRUCT
gcp_resources:
description: 'GCP resources created by this component. For more details,
see
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.'
parameterType: STRING
comp-automl-tabular-cv-trainer-2:
executorLabel: exec-automl-tabular-cv-trainer-2
inputDefinitions:
artifacts:
materialized_cv_splits:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The materialized cross-validation splits.
metadata:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The tabular example gen metadata.
transform_output:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The transform output artifact.
tuning_result_input:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: AutoML Tabular tuning result.
parameters:
deadline_hours:
description: Number of hours the cross-validation trainer should run.
parameterType: NUMBER_DOUBLE
encryption_spec_key_name:
defaultValue: ''
description: Customer-managed encryption key.
isOptional: true
parameterType: STRING
location:
description: Location for running the Cross-validation trainer.
parameterType: STRING
num_parallel_trials:
description: Number of parallel training trials.
parameterType: NUMBER_INTEGER
num_selected_features:
defaultValue: 0.0
description: 'Number of selected features. The number of
features to learn in the NN models.'
isOptional: true
parameterType: NUMBER_INTEGER
num_selected_trials:
description: 'Number of selected trials. The number of weak
learners in the final model is 5 * num_selected_trials.'
parameterType: NUMBER_INTEGER
project:
description: Project to run Cross-validation trainer.
parameterType: STRING
root_dir:
description: The Cloud Storage location to store the output.
parameterType: STRING
single_run_max_secs:
description: Max number of seconds each training trial runs.
parameterType: NUMBER_INTEGER
worker_pool_specs_override_json:
defaultValue: []
description: 'JSON worker pool specs. E.g.,
[{"machine_spec": {"machine_type":
"n1-standard-16"}},{},{},{"machine_spec": {"machine_type":
"n1-standard-16"}}]'
isOptional: true
parameterType: LIST
outputDefinitions:
artifacts:
tuning_result_output:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The trained model and architectures.
parameters:
execution_metrics:
description: Core metrics in dictionary of component execution.
parameterType: STRUCT
gcp_resources:
description: 'GCP resources created by this component. For more details,
see
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.'
parameterType: STRING
comp-automl-tabular-ensemble:
executorLabel: exec-automl-tabular-ensemble
inputDefinitions:
artifacts:
dataset_schema:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The schema of the dataset.
instance_baseline:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: 'The instance baseline
used to calculate explanations.'
metadata:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The tabular example gen metadata.
transform_output:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The transform output artifact.
tuning_result_input:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: 'AutoML Tabular tuning
result.'
warmup_data:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
description: 'The warm up data. Ensemble component will save the
warm up data together with the model artifact, used to warm up the model
when prediction server starts.'
isOptional: true
parameters:
encryption_spec_key_name:
defaultValue: ''
description: Customer-managed encryption key.
isOptional: true
parameterType: STRING
export_additional_model_without_custom_ops:
defaultValue: false
description: 'True if export
an additional model without custom TF operators to the
`model_without_custom_ops` output.'
isOptional: true
parameterType: BOOLEAN
location:
description: Location for running the Cross-validation trainer.
parameterType: STRING
project:
description: Project to run Cross-validation trainer.
parameterType: STRING
root_dir:
description: The Cloud Storage location to store the output.
parameterType: STRING
outputDefinitions:
artifacts:
explanation_metadata_artifact:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
model:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The output model.
model_architecture:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The architecture of the output model.
model_without_custom_ops:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The output model without custom TF operators, this output will
be empty unless `export_additional_model_without_custom_ops` is set.
unmanaged_container_model:
artifactType:
schemaTitle: google.UnmanagedContainerModel
schemaVersion: 0.0.1
parameters:
explanation_metadata:
description: The explanation parameters used by Vertex online and batch
explanations.
parameterType: STRUCT
explanation_parameters:
parameterType: STRUCT
gcp_resources:
description: 'GCP resources created by this component. For more details,
see
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.'
parameterType: STRING
comp-automl-tabular-ensemble-2:
executorLabel: exec-automl-tabular-ensemble-2
inputDefinitions:
artifacts:
dataset_schema:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The schema of the dataset.
instance_baseline:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: 'The instance baseline
used to calculate explanations.'
metadata:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The tabular example gen metadata.
transform_output:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The transform output artifact.
tuning_result_input:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: 'AutoML Tabular tuning
result.'
warmup_data:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
description: 'The warm up data. Ensemble component will save the
warm up data together with the model artifact, used to warm up the model
when prediction server starts.'
isOptional: true
parameters:
encryption_spec_key_name:
defaultValue: ''
description: Customer-managed encryption key.
isOptional: true
parameterType: STRING
export_additional_model_without_custom_ops:
defaultValue: false
description: 'True if export
an additional model without custom TF operators to the
`model_without_custom_ops` output.'
isOptional: true
parameterType: BOOLEAN
location:
description: Location for running the Cross-validation trainer.
parameterType: STRING
project:
description: Project to run Cross-validation trainer.
parameterType: STRING
root_dir:
description: The Cloud Storage location to store the output.
parameterType: STRING
outputDefinitions:
artifacts:
explanation_metadata_artifact:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
model:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The output model.
model_architecture:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The architecture of the output model.
model_without_custom_ops:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The output model without custom TF operators, this output will
be empty unless `export_additional_model_without_custom_ops` is set.
unmanaged_container_model:
artifactType:
schemaTitle: google.UnmanagedContainerModel
schemaVersion: 0.0.1
parameters:
explanation_metadata:
description: The explanation parameters used by Vertex online and batch
explanations.
parameterType: STRUCT
explanation_parameters:
parameterType: STRUCT
gcp_resources:
description: 'GCP resources created by this component. For more details,
see
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.'
parameterType: STRING
comp-automl-tabular-ensemble-3:
executorLabel: exec-automl-tabular-ensemble-3
inputDefinitions:
artifacts:
dataset_schema:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The schema of the dataset.
instance_baseline:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: 'The instance baseline
used to calculate explanations.'
metadata:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The tabular example gen metadata.
transform_output:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The transform output artifact.
tuning_result_input:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: 'AutoML Tabular tuning
result.'
warmup_data:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
description: 'The warm up data. Ensemble component will save the
warm up data together with the model artifact, used to warm up the model
when prediction server starts.'
isOptional: true
parameters:
encryption_spec_key_name:
defaultValue: ''
description: Customer-managed encryption key.
isOptional: true
parameterType: STRING
export_additional_model_without_custom_ops:
defaultValue: false
description: 'True if export
an additional model without custom TF operators to the
`model_without_custom_ops` output.'
isOptional: true
parameterType: BOOLEAN
location:
description: Location for running the Cross-validation trainer.
parameterType: STRING
project:
description: Project to run Cross-validation trainer.
parameterType: STRING
root_dir:
description: The Cloud Storage location to store the output.
parameterType: STRING
outputDefinitions:
artifacts:
explanation_metadata_artifact:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
model:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The output model.
model_architecture:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The architecture of the output model.
model_without_custom_ops:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The output model without custom TF operators, this output will
be empty unless `export_additional_model_without_custom_ops` is set.
unmanaged_container_model:
artifactType:
schemaTitle: google.UnmanagedContainerModel
schemaVersion: 0.0.1
parameters:
explanation_metadata:
description: The explanation parameters used by Vertex online and batch
explanations.
parameterType: STRUCT
explanation_parameters:
parameterType: STRUCT
gcp_resources:
description: 'GCP resources created by this component. For more details,
see
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.'
parameterType: STRING
comp-automl-tabular-finalizer:
executorLabel: exec-automl-tabular-finalizer
inputDefinitions:
parameters:
encryption_spec_key_name:
defaultValue: ''
description: Customer-managed encryption key.
isOptional: true
parameterType: STRING
location:
description: Location for running the Cross-validation trainer.
parameterType: STRING
project:
description: Project to run Cross-validation trainer.
parameterType: STRING
root_dir:
description: The Cloud Storage location to store the output.
parameterType: STRING
outputDefinitions:
parameters:
gcp_resources:
description: 'GCP resources created by this component. For more details,
see
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.'
parameterType: STRING
comp-automl-tabular-infra-validator:
executorLabel: exec-automl-tabular-infra-validator
inputDefinitions:
artifacts:
unmanaged_container_model:
artifactType:
schemaTitle: google.UnmanagedContainerModel
schemaVersion: 0.0.1
description: 'google.UnmanagedContainerModel for model
to be validated.'
comp-automl-tabular-infra-validator-2:
executorLabel: exec-automl-tabular-infra-validator-2
inputDefinitions:
artifacts:
unmanaged_container_model:
artifactType:
schemaTitle: google.UnmanagedContainerModel
schemaVersion: 0.0.1
description: 'google.UnmanagedContainerModel for model
to be validated.'
comp-automl-tabular-infra-validator-3:
executorLabel: exec-automl-tabular-infra-validator-3
inputDefinitions:
artifacts:
unmanaged_container_model:
artifactType:
schemaTitle: google.UnmanagedContainerModel
schemaVersion: 0.0.1
description: 'google.UnmanagedContainerModel for model
to be validated.'
comp-automl-tabular-stage-1-tuner:
executorLabel: exec-automl-tabular-stage-1-tuner
inputDefinitions:
artifacts:
feature_ranking:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
isOptional: true
materialized_eval_split:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The materialized eval split.
materialized_train_split:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: 'The materialized train
split.'
metadata:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The tabular example gen metadata.
transform_output:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The transform output artifact.
parameters:
deadline_hours:
description: 'Number of hours the cross-validation trainer
should run.'
parameterType: NUMBER_DOUBLE
disable_early_stopping:
defaultValue: false
description: 'True if disable early stopping. Default
value is false.'
isOptional: true
parameterType: BOOLEAN
encryption_spec_key_name:
defaultValue: ''
description: Customer-managed encryption key.
isOptional: true
parameterType: STRING
location:
description: Location for running the Cross-validation trainer.
parameterType: STRING
num_parallel_trials:
description: Number of parallel training trials.
parameterType: NUMBER_INTEGER
num_selected_features:
defaultValue: 0.0
description: 'Number of selected features. The number of
features to learn in the NN models.'
isOptional: true
parameterType: NUMBER_INTEGER
num_selected_trials:
description: 'Number of selected trials. The number of weak
learners in the final model is 5 * num_selected_trials.'
parameterType: NUMBER_INTEGER
project:
description: Project to run Cross-validation trainer.
parameterType: STRING
reduce_search_space_mode:
defaultValue: regular
description: 'The reduce search space mode. Possible
values: "regular" (default), "minimal", "full".'
isOptional: true
parameterType: STRING
root_dir:
description: The Cloud Storage location to store the output.
parameterType: STRING
run_distillation:
defaultValue: false
description: 'True if in distillation mode. The default value
is false.'
isOptional: true
parameterType: BOOLEAN
single_run_max_secs:
description: Max number of seconds each training trial runs.
parameterType: NUMBER_INTEGER
study_spec_parameters_override:
defaultValue: []
description: 'JSON study spec. E.g.,
[{"parameter_id": "model_type","categorical_value_spec": {"values":
["nn"]}}]'
isOptional: true
parameterType: LIST
tune_feature_selection_rate:
defaultValue: false
isOptional: true
parameterType: BOOLEAN
worker_pool_specs_override_json:
defaultValue: []
description: 'JSON worker pool specs. E.g.,
[{"machine_spec": {"machine_type":
"n1-standard-16"}},{},{},{"machine_spec": {"machine_type":
"n1-standard-16"}}]'
isOptional: true
parameterType: LIST
outputDefinitions:
artifacts:
tuning_result_output:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The trained model and architectures.
parameters:
execution_metrics:
description: Core metrics in dictionary of component execution.
parameterType: STRUCT
gcp_resources:
description: 'GCP resources created by this component. For more details,
see
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.'
parameterType: STRING
comp-automl-tabular-stage-1-tuner-2:
executorLabel: exec-automl-tabular-stage-1-tuner-2
inputDefinitions:
artifacts:
feature_ranking:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
isOptional: true
materialized_eval_split:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The materialized eval split.
materialized_train_split:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: 'The materialized train
split.'
metadata:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The tabular example gen metadata.
transform_output:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The transform output artifact.
parameters:
deadline_hours:
description: 'Number of hours the cross-validation trainer
should run.'
parameterType: NUMBER_DOUBLE
disable_early_stopping:
defaultValue: false
description: 'True if disable early stopping. Default
value is false.'
isOptional: true
parameterType: BOOLEAN
encryption_spec_key_name:
defaultValue: ''
description: Customer-managed encryption key.
isOptional: true
parameterType: STRING
location:
description: Location for running the Cross-validation trainer.
parameterType: STRING
num_parallel_trials:
description: Number of parallel training trials.
parameterType: NUMBER_INTEGER
num_selected_features:
defaultValue: 0.0
description: 'Number of selected features. The number of
features to learn in the NN models.'
isOptional: true
parameterType: NUMBER_INTEGER
num_selected_trials:
description: 'Number of selected trials. The number of weak
learners in the final model is 5 * num_selected_trials.'
parameterType: NUMBER_INTEGER
project:
description: Project to run Cross-validation trainer.
parameterType: STRING
reduce_search_space_mode:
defaultValue: regular
description: 'The reduce search space mode. Possible
values: "regular" (default), "minimal", "full".'
isOptional: true
parameterType: STRING
root_dir:
description: The Cloud Storage location to store the output.
parameterType: STRING
run_distillation:
defaultValue: false
description: 'True if in distillation mode. The default value
is false.'
isOptional: true
parameterType: BOOLEAN
single_run_max_secs:
description: Max number of seconds each training trial runs.
parameterType: NUMBER_INTEGER
study_spec_parameters_override:
defaultValue: []
description: 'JSON study spec. E.g.,
[{"parameter_id": "model_type","categorical_value_spec": {"values":
["nn"]}}]'
isOptional: true
parameterType: LIST
tune_feature_selection_rate:
defaultValue: false
isOptional: true
parameterType: BOOLEAN
worker_pool_specs_override_json:
defaultValue: []
description: 'JSON worker pool specs. E.g.,
[{"machine_spec": {"machine_type":
"n1-standard-16"}},{},{},{"machine_spec": {"machine_type":
"n1-standard-16"}}]'
isOptional: true
parameterType: LIST
outputDefinitions:
artifacts:
tuning_result_output:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The trained model and architectures.
parameters:
execution_metrics:
description: Core metrics in dictionary of component execution.
parameterType: STRUCT
gcp_resources:
description: 'GCP resources created by this component. For more details,
see
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.'
parameterType: STRING
comp-automl-tabular-transform:
executorLabel: exec-automl-tabular-transform
inputDefinitions:
artifacts:
dataset_schema:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The schema of the dataset.
eval_split:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
description: The eval split.
metadata:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The tabular example gen metadata.
test_split:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
description: The test split.
train_split:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
description: The train split.
parameters:
dataflow_disk_size_gb:
defaultValue: 40.0
description: 'The disk size, in gigabytes, to use
on each Dataflow worker instance. If not set, default to 40.'
isOptional: true
parameterType: NUMBER_INTEGER
dataflow_machine_type:
defaultValue: n1-standard-16
description: 'The machine type used for dataflow
jobs. If not set, default to n1-standard-16.'
isOptional: true
parameterType: STRING
dataflow_max_num_workers:
defaultValue: 25.0
description: 'The number of workers to run the
dataflow job. If not set, default to 25.'
isOptional: true
parameterType: NUMBER_INTEGER
dataflow_service_account:
defaultValue: ''
description: 'Custom service account to run
dataflow jobs.'
isOptional: true
parameterType: STRING
dataflow_subnetwork:
defaultValue: ''
description: 'Dataflow''s fully qualified subnetwork
name, when empty the default subnetwork will be used. More
details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications'
isOptional: true
parameterType: STRING
dataflow_use_public_ips:
defaultValue: true
description: 'Specifies whether Dataflow
workers use public IP addresses.'
isOptional: true
parameterType: BOOLEAN
encryption_spec_key_name:
defaultValue: ''
description: Customer-managed encryption key.
isOptional: true
parameterType: STRING
location:
description: Location for running the Cross-validation trainer.
parameterType: STRING
project:
description: Project to run Cross-validation trainer.
parameterType: STRING
root_dir:
description: The Cloud Storage location to store the output.
parameterType: STRING
outputDefinitions:
artifacts:
materialized_eval_split:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The materialized test split.
materialized_test_split:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
materialized_train_split:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The materialized train split.
training_schema_uri:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The training schema.
transform_output:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The transform output artifact.
parameters:
gcp_resources:
description: 'GCP resources created by this component. For more details,
see
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.'
parameterType: STRING
comp-automl-tabular-transform-2:
executorLabel: exec-automl-tabular-transform-2
inputDefinitions:
artifacts:
dataset_schema:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The schema of the dataset.
eval_split:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
description: The eval split.
metadata:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The tabular example gen metadata.
test_split:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
description: The test split.
train_split:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
description: The train split.
parameters:
dataflow_disk_size_gb:
defaultValue: 40.0
description: 'The disk size, in gigabytes, to use
on each Dataflow worker instance. If not set, default to 40.'
isOptional: true
parameterType: NUMBER_INTEGER
dataflow_machine_type:
defaultValue: n1-standard-16
description: 'The machine type used for dataflow
jobs. If not set, default to n1-standard-16.'
isOptional: true
parameterType: STRING
dataflow_max_num_workers:
defaultValue: 25.0
description: 'The number of workers to run the
dataflow job. If not set, default to 25.'
isOptional: true
parameterType: NUMBER_INTEGER
dataflow_service_account:
defaultValue: ''
description: 'Custom service account to run
dataflow jobs.'
isOptional: true
parameterType: STRING
dataflow_subnetwork:
defaultValue: ''
description: 'Dataflow''s fully qualified subnetwork
name, when empty the default subnetwork will be used. More
details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications'
isOptional: true
parameterType: STRING
dataflow_use_public_ips:
defaultValue: true
description: 'Specifies whether Dataflow
workers use public IP addresses.'
isOptional: true
parameterType: BOOLEAN
encryption_spec_key_name:
defaultValue: ''
description: Customer-managed encryption key.
isOptional: true
parameterType: STRING
location:
description: Location for running the Cross-validation trainer.
parameterType: STRING
project:
description: Project to run Cross-validation trainer.
parameterType: STRING
root_dir:
description: The Cloud Storage location to store the output.
parameterType: STRING
outputDefinitions:
artifacts:
materialized_eval_split:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The materialized test split.
materialized_test_split:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
materialized_train_split:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The materialized train split.
training_schema_uri:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The training schema.
transform_output:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The transform output artifact.
parameters:
gcp_resources:
description: 'GCP resources created by this component. For more details,
see
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.'
parameterType: STRING
comp-bool-identity:
executorLabel: exec-bool-identity
inputDefinitions:
parameters:
value:
description: Boolean value to return
parameterType: BOOLEAN
outputDefinitions:
parameters:
Output:
parameterType: STRING
comp-bool-identity-2:
executorLabel: exec-bool-identity-2
inputDefinitions:
parameters:
value:
description: Boolean value to return
parameterType: BOOLEAN
outputDefinitions:
parameters:
Output:
parameterType: STRING
comp-bool-identity-3:
executorLabel: exec-bool-identity-3
inputDefinitions:
parameters:
value:
description: Boolean value to return
parameterType: BOOLEAN
outputDefinitions:
parameters:
Output:
parameterType: STRING
comp-calculate-training-parameters:
executorLabel: exec-calculate-training-parameters
inputDefinitions:
parameters:
fast_testing:
defaultValue: false
description: Internal flag used for presubmit tests.
isOptional: true
parameterType: BOOLEAN
is_skip_architecture_search:
defaultValue: false
description: 'If component is being called in the
skip_architecture_search pipeline.'
isOptional: true
parameterType: BOOLEAN
run_distillation:
description: Whether to run distill in the training pipeline.
parameterType: BOOLEAN
stage_1_num_parallel_trials:
description: Number of parallel trails for stage 1.
parameterType: NUMBER_INTEGER
stage_2_num_parallel_trials:
description: Number of parallel trails for stage 2.
parameterType: NUMBER_INTEGER
train_budget_milli_node_hours:
description: 'The train budget of creating this model,
expressed in milli node hours i.e. 1,000 value in this field means 1 node
hour.'
parameterType: NUMBER_DOUBLE
outputDefinitions:
parameters:
distill_stage_1_deadline_hours:
parameterType: NUMBER_DOUBLE
reduce_search_space_mode:
parameterType: STRING
stage_1_deadline_hours:
parameterType: NUMBER_DOUBLE
stage_1_num_selected_trials:
parameterType: NUMBER_INTEGER
stage_1_single_run_max_secs:
parameterType: NUMBER_INTEGER
stage_2_deadline_hours:
parameterType: NUMBER_DOUBLE
stage_2_single_run_max_secs:
parameterType: NUMBER_INTEGER
comp-calculate-training-parameters-2:
executorLabel: exec-calculate-training-parameters-2
inputDefinitions:
parameters:
fast_testing:
defaultValue: false
description: Internal flag used for presubmit tests.
isOptional: true
parameterType: BOOLEAN
is_skip_architecture_search:
defaultValue: false
description: 'If component is being called in the
skip_architecture_search pipeline.'
isOptional: true
parameterType: BOOLEAN
run_distillation:
description: Whether to run distill in the training pipeline.
parameterType: BOOLEAN
stage_1_num_parallel_trials:
description: Number of parallel trails for stage 1.
parameterType: NUMBER_INTEGER
stage_2_num_parallel_trials:
description: Number of parallel trails for stage 2.
parameterType: NUMBER_INTEGER
train_budget_milli_node_hours:
description: 'The train budget of creating this model,
expressed in milli node hours i.e. 1,000 value in this field means 1 node
hour.'
parameterType: NUMBER_DOUBLE
outputDefinitions:
parameters:
distill_stage_1_deadline_hours:
parameterType: NUMBER_DOUBLE
reduce_search_space_mode:
parameterType: STRING
stage_1_deadline_hours:
parameterType: NUMBER_DOUBLE
stage_1_num_selected_trials:
parameterType: NUMBER_INTEGER
stage_1_single_run_max_secs:
parameterType: NUMBER_INTEGER
stage_2_deadline_hours:
parameterType: NUMBER_DOUBLE
stage_2_single_run_max_secs:
parameterType: NUMBER_INTEGER
comp-condition-2:
dag:
outputs:
artifacts:
feature-attribution-feature_attributions:
artifactSelectors:
- outputArtifactKey: feature-attribution-feature_attributions
producerSubtask: condition-3
model-evaluation-evaluation_metrics:
artifactSelectors:
- outputArtifactKey: model-evaluation-evaluation_metrics
producerSubtask: condition-3
tasks:
automl-tabular-cv-trainer:
cachingOptions:
enableCache: true
componentRef:
name: comp-automl-tabular-cv-trainer
dependentTasks:
- calculate-training-parameters
- importer
inputs:
artifacts:
materialized_cv_splits:
componentInputArtifact: pipelinechannel--merge-materialized-splits-splits
metadata:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata
transform_output:
componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output
tuning_result_input:
taskOutputArtifact:
outputArtifactKey: artifact
producerTask: importer
parameters:
deadline_hours:
taskOutputParameter:
outputParameterKey: stage_2_deadline_hours
producerTask: calculate-training-parameters
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
location:
componentInputParameter: pipelinechannel--location
num_parallel_trials:
componentInputParameter: pipelinechannel--stage_2_num_parallel_trials
num_selected_trials:
componentInputParameter: pipelinechannel--stage_2_num_selected_trials
project:
componentInputParameter: pipelinechannel--project
root_dir:
componentInputParameter: pipelinechannel--root_dir
single_run_max_secs:
taskOutputParameter:
outputParameterKey: stage_2_single_run_max_secs
producerTask: calculate-training-parameters
worker_pool_specs_override_json:
componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override
taskInfo:
name: automl-tabular-cv-trainer
automl-tabular-ensemble:
cachingOptions:
enableCache: true
componentRef:
name: comp-automl-tabular-ensemble
dependentTasks:
- automl-tabular-cv-trainer
inputs:
artifacts:
dataset_schema:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema
instance_baseline:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline
metadata:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata
transform_output:
componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output
tuning_result_input:
taskOutputArtifact:
outputArtifactKey: tuning_result_output
producerTask: automl-tabular-cv-trainer
warmup_data:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split
parameters:
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
export_additional_model_without_custom_ops:
componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops
location:
componentInputParameter: pipelinechannel--location
project:
componentInputParameter: pipelinechannel--project
root_dir:
componentInputParameter: pipelinechannel--root_dir
taskInfo:
name: automl-tabular-ensemble
automl-tabular-infra-validator:
cachingOptions:
enableCache: true
componentRef:
name: comp-automl-tabular-infra-validator
dependentTasks:
- automl-tabular-ensemble
inputs:
artifacts:
unmanaged_container_model:
taskOutputArtifact:
outputArtifactKey: unmanaged_container_model
producerTask: automl-tabular-ensemble
taskInfo:
name: automl-tabular-infra-validator
bool-identity:
cachingOptions:
enableCache: true
componentRef:
name: comp-bool-identity
inputs:
parameters:
value:
componentInputParameter: pipelinechannel--run_evaluation
taskInfo:
name: bool-identity
calculate-training-parameters:
cachingOptions:
enableCache: true
componentRef:
name: comp-calculate-training-parameters
inputs:
parameters:
fast_testing:
componentInputParameter: pipelinechannel--fast_testing
is_skip_architecture_search:
runtimeValue:
constant: 1.0
run_distillation:
componentInputParameter: pipelinechannel--run_distillation
stage_1_num_parallel_trials:
componentInputParameter: pipelinechannel--stage_1_num_parallel_trials
stage_2_num_parallel_trials:
componentInputParameter: pipelinechannel--stage_2_num_parallel_trials
train_budget_milli_node_hours:
componentInputParameter: pipelinechannel--train_budget_milli_node_hours
taskInfo:
name: calculate-training-parameters
condition-3:
componentRef:
name: comp-condition-3
dependentTasks:
- automl-tabular-ensemble
- bool-identity
- model-upload
inputs:
artifacts:
pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact:
taskOutputArtifact:
outputArtifactKey: explanation_metadata_artifact
producerTask: automl-tabular-ensemble
pipelinechannel--automl-tabular-ensemble-unmanaged_container_model:
taskOutputArtifact:
outputArtifactKey: unmanaged_container_model
producerTask: automl-tabular-ensemble
pipelinechannel--model-upload-model:
taskOutputArtifact:
outputArtifactKey: model
producerTask: model-upload
parameters:
pipelinechannel--automl-tabular-ensemble-explanation_parameters:
taskOutputParameter:
outputParameterKey: explanation_parameters
producerTask: automl-tabular-ensemble
pipelinechannel--bool-identity-Output:
taskOutputParameter:
outputParameterKey: Output
producerTask: bool-identity
pipelinechannel--dataflow_service_account:
componentInputParameter: pipelinechannel--dataflow_service_account
pipelinechannel--dataflow_subnetwork:
componentInputParameter: pipelinechannel--dataflow_subnetwork
pipelinechannel--dataflow_use_public_ips:
componentInputParameter: pipelinechannel--dataflow_use_public_ips
pipelinechannel--encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
pipelinechannel--evaluation_batch_explain_machine_type:
componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type
pipelinechannel--evaluation_batch_explain_max_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count
pipelinechannel--evaluation_batch_explain_starting_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count
pipelinechannel--evaluation_batch_predict_machine_type:
componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type
pipelinechannel--evaluation_batch_predict_max_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count
pipelinechannel--evaluation_batch_predict_starting_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count
pipelinechannel--evaluation_dataflow_disk_size_gb:
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb
pipelinechannel--evaluation_dataflow_machine_type:
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type
pipelinechannel--evaluation_dataflow_max_num_workers:
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers
pipelinechannel--evaluation_dataflow_starting_num_workers:
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers
pipelinechannel--location:
componentInputParameter: pipelinechannel--location
pipelinechannel--prediction_type:
componentInputParameter: pipelinechannel--prediction_type
pipelinechannel--project:
componentInputParameter: pipelinechannel--project
pipelinechannel--root_dir:
componentInputParameter: pipelinechannel--root_dir
pipelinechannel--string-not-empty-Output:
componentInputParameter: pipelinechannel--string-not-empty-Output
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json:
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json
pipelinechannel--tabular-stats-and-example-gen-test_split_json:
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json
pipelinechannel--target_column:
componentInputParameter: pipelinechannel--target_column
taskInfo:
name: is-evaluation
triggerPolicy:
condition: inputs.parameter_values['pipelinechannel--bool-identity-Output']
== 'true'
importer:
cachingOptions:
enableCache: true
componentRef:
name: comp-importer
inputs:
parameters:
uri:
componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri
taskInfo:
name: importer
model-upload:
cachingOptions:
enableCache: true
componentRef:
name: comp-model-upload
dependentTasks:
- automl-tabular-ensemble
inputs:
artifacts:
explanation_metadata_artifact:
taskOutputArtifact:
outputArtifactKey: explanation_metadata_artifact
producerTask: automl-tabular-ensemble
parent_model:
componentInputArtifact: pipelinechannel--parent_model
unmanaged_container_model:
taskOutputArtifact:
outputArtifactKey: unmanaged_container_model
producerTask: automl-tabular-ensemble
parameters:
description:
componentInputParameter: pipelinechannel--model_description
display_name:
componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
explanation_parameters:
taskOutputParameter:
outputParameterKey: explanation_parameters
producerTask: automl-tabular-ensemble
location:
componentInputParameter: pipelinechannel--location
project:
componentInputParameter: pipelinechannel--project
taskInfo:
name: model-upload
inputDefinitions:
artifacts:
pipelinechannel--automl-tabular-transform-transform_output:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
pipelinechannel--merge-materialized-splits-splits:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
pipelinechannel--parent_model:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
pipelinechannel--tabular-stats-and-example-gen-dataset_schema:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
pipelinechannel--tabular-stats-and-example-gen-eval_split:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
pipelinechannel--tabular-stats-and-example-gen-instance_baseline:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
pipelinechannel--tabular-stats-and-example-gen-metadata:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
parameters:
pipelinechannel--cv_trainer_worker_pool_specs_override:
parameterType: LIST
pipelinechannel--dataflow_service_account:
parameterType: STRING
pipelinechannel--dataflow_subnetwork:
parameterType: STRING
pipelinechannel--dataflow_use_public_ips:
parameterType: BOOLEAN
pipelinechannel--encryption_spec_key_name:
parameterType: STRING
pipelinechannel--evaluation_batch_explain_machine_type:
parameterType: STRING
pipelinechannel--evaluation_batch_explain_max_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_batch_explain_starting_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_batch_predict_machine_type:
parameterType: STRING
pipelinechannel--evaluation_batch_predict_max_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_batch_predict_starting_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_dataflow_disk_size_gb:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_dataflow_machine_type:
parameterType: STRING
pipelinechannel--evaluation_dataflow_max_num_workers:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_dataflow_starting_num_workers:
parameterType: NUMBER_INTEGER
pipelinechannel--export_additional_model_without_custom_ops:
parameterType: BOOLEAN
pipelinechannel--fast_testing:
parameterType: BOOLEAN
pipelinechannel--location:
parameterType: STRING
pipelinechannel--model_description:
parameterType: STRING
pipelinechannel--prediction_type:
parameterType: STRING
pipelinechannel--project:
parameterType: STRING
pipelinechannel--root_dir:
parameterType: STRING
pipelinechannel--run_distillation:
parameterType: BOOLEAN
pipelinechannel--run_evaluation:
parameterType: BOOLEAN
pipelinechannel--set-optional-inputs-model_display_name:
parameterType: STRING
pipelinechannel--stage_1_num_parallel_trials:
parameterType: NUMBER_INTEGER
pipelinechannel--stage_1_tuning_result_artifact_uri:
parameterType: STRING
pipelinechannel--stage_2_num_parallel_trials:
parameterType: NUMBER_INTEGER
pipelinechannel--stage_2_num_selected_trials:
parameterType: NUMBER_INTEGER
pipelinechannel--string-not-empty-Output:
parameterType: STRING
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json:
parameterType: LIST
pipelinechannel--tabular-stats-and-example-gen-test_split_json:
parameterType: LIST
pipelinechannel--target_column:
parameterType: STRING
pipelinechannel--train_budget_milli_node_hours:
parameterType: NUMBER_DOUBLE
outputDefinitions:
artifacts:
feature-attribution-feature_attributions:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
model-evaluation-evaluation_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
comp-condition-3:
dag:
outputs:
artifacts:
feature-attribution-feature_attributions:
artifactSelectors:
- outputArtifactKey: feature_attributions
producerSubtask: feature-attribution
model-evaluation-evaluation_metrics:
artifactSelectors:
- outputArtifactKey: evaluation_metrics
producerSubtask: model-evaluation
tasks:
feature-attribution:
cachingOptions:
enableCache: true
componentRef:
name: comp-feature-attribution
dependentTasks:
- model-batch-explanation
inputs:
artifacts:
predictions_gcs_source:
taskOutputArtifact:
outputArtifactKey: gcs_output_directory
producerTask: model-batch-explanation
parameters:
dataflow_disk_size:
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb
dataflow_machine_type:
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type
dataflow_max_workers_num:
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers
dataflow_service_account:
componentInputParameter: pipelinechannel--dataflow_service_account
dataflow_subnetwork:
componentInputParameter: pipelinechannel--dataflow_subnetwork
dataflow_use_public_ips:
componentInputParameter: pipelinechannel--dataflow_use_public_ips
dataflow_workers_num:
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
location:
componentInputParameter: pipelinechannel--location
predictions_format:
runtimeValue:
constant: jsonl
project:
componentInputParameter: pipelinechannel--project
taskInfo:
name: feature-attribution
model-batch-explanation:
cachingOptions:
enableCache: true
componentRef:
name: comp-model-batch-explanation
inputs:
artifacts:
explanation_metadata_artifact:
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact
unmanaged_container_model:
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-unmanaged_container_model
parameters:
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
explanation_parameters:
componentInputParameter: pipelinechannel--automl-tabular-ensemble-explanation_parameters
gcs_destination_output_uri_prefix:
componentInputParameter: pipelinechannel--root_dir
gcs_source_uris:
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json
generate_explanation:
runtimeValue:
constant: 1.0
instances_format:
runtimeValue:
constant: tf-record
job_display_name:
runtimeValue:
constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}
location:
componentInputParameter: pipelinechannel--location
machine_type:
componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type
max_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count
predictions_format:
runtimeValue:
constant: jsonl
project:
componentInputParameter: pipelinechannel--project
starting_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count
taskInfo:
name: model-batch-explanation
model-batch-predict:
cachingOptions:
enableCache: true
componentRef:
name: comp-model-batch-predict
inputs:
artifacts:
unmanaged_container_model:
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-unmanaged_container_model
parameters:
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
gcs_destination_output_uri_prefix:
componentInputParameter: pipelinechannel--root_dir
gcs_source_uris:
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json
instances_format:
runtimeValue:
constant: tf-record
job_display_name:
runtimeValue:
constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}
location:
componentInputParameter: pipelinechannel--location
machine_type:
componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type
max_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count
predictions_format:
runtimeValue:
constant: jsonl
project:
componentInputParameter: pipelinechannel--project
starting_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count
taskInfo:
name: model-batch-predict
model-evaluation:
cachingOptions:
enableCache: true
componentRef:
name: comp-model-evaluation
dependentTasks:
- model-batch-predict
inputs:
artifacts:
batch_prediction_job:
taskOutputArtifact:
outputArtifactKey: batchpredictionjob
producerTask: model-batch-predict
parameters:
dataflow_disk_size:
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb
dataflow_machine_type:
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type
dataflow_max_workers_num:
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers
dataflow_service_account:
componentInputParameter: pipelinechannel--dataflow_service_account
dataflow_subnetwork:
componentInputParameter: pipelinechannel--dataflow_subnetwork
dataflow_use_public_ips:
componentInputParameter: pipelinechannel--dataflow_use_public_ips
dataflow_workers_num:
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
ground_truth_column:
componentInputParameter: pipelinechannel--target_column
ground_truth_format:
runtimeValue:
constant: jsonl
location:
componentInputParameter: pipelinechannel--location
prediction_label_column:
runtimeValue:
constant: ''
prediction_score_column:
runtimeValue:
constant: ''
predictions_format:
runtimeValue:
constant: jsonl
problem_type:
componentInputParameter: pipelinechannel--prediction_type
project:
componentInputParameter: pipelinechannel--project
root_dir:
componentInputParameter: pipelinechannel--root_dir
taskInfo:
name: model-evaluation
model-evaluation-import:
cachingOptions:
enableCache: true
componentRef:
name: comp-model-evaluation-import
dependentTasks:
- feature-attribution
- model-evaluation
inputs:
artifacts:
feature_attributions:
taskOutputArtifact:
outputArtifactKey: feature_attributions
producerTask: feature-attribution
metrics:
taskOutputArtifact:
outputArtifactKey: evaluation_metrics
producerTask: model-evaluation
model:
componentInputArtifact: pipelinechannel--model-upload-model
parameters:
dataset_paths:
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json
dataset_type:
runtimeValue:
constant: tf-record
display_name:
runtimeValue:
constant: AutoML Tabular
problem_type:
componentInputParameter: pipelinechannel--prediction_type
taskInfo:
name: model-evaluation-import
inputDefinitions:
artifacts:
pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
pipelinechannel--automl-tabular-ensemble-unmanaged_container_model:
artifactType:
schemaTitle: google.UnmanagedContainerModel
schemaVersion: 0.0.1
pipelinechannel--model-upload-model:
artifactType:
schemaTitle: google.VertexModel
schemaVersion: 0.0.1
parameters:
pipelinechannel--automl-tabular-ensemble-explanation_parameters:
parameterType: STRUCT
pipelinechannel--bool-identity-Output:
parameterType: STRING
pipelinechannel--dataflow_service_account:
parameterType: STRING
pipelinechannel--dataflow_subnetwork:
parameterType: STRING
pipelinechannel--dataflow_use_public_ips:
parameterType: BOOLEAN
pipelinechannel--encryption_spec_key_name:
parameterType: STRING
pipelinechannel--evaluation_batch_explain_machine_type:
parameterType: STRING
pipelinechannel--evaluation_batch_explain_max_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_batch_explain_starting_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_batch_predict_machine_type:
parameterType: STRING
pipelinechannel--evaluation_batch_predict_max_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_batch_predict_starting_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_dataflow_disk_size_gb:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_dataflow_machine_type:
parameterType: STRING
pipelinechannel--evaluation_dataflow_max_num_workers:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_dataflow_starting_num_workers:
parameterType: NUMBER_INTEGER
pipelinechannel--location:
parameterType: STRING
pipelinechannel--prediction_type:
parameterType: STRING
pipelinechannel--project:
parameterType: STRING
pipelinechannel--root_dir:
parameterType: STRING
pipelinechannel--string-not-empty-Output:
parameterType: STRING
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json:
parameterType: LIST
pipelinechannel--tabular-stats-and-example-gen-test_split_json:
parameterType: LIST
pipelinechannel--target_column:
parameterType: STRING
outputDefinitions:
artifacts:
feature-attribution-feature_attributions:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
model-evaluation-evaluation_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
comp-condition-4:
dag:
outputs:
artifacts:
feature-attribution-2-feature_attributions:
artifactSelectors:
- outputArtifactKey: feature-attribution-2-feature_attributions
producerSubtask: condition-5
feature-attribution-3-feature_attributions:
artifactSelectors:
- outputArtifactKey: feature-attribution-3-feature_attributions
producerSubtask: condition-7
model-evaluation-2-evaluation_metrics:
artifactSelectors:
- outputArtifactKey: model-evaluation-2-evaluation_metrics
producerSubtask: condition-5
model-evaluation-3-evaluation_metrics:
artifactSelectors:
- outputArtifactKey: model-evaluation-3-evaluation_metrics
producerSubtask: condition-7
tasks:
automl-tabular-cv-trainer-2:
cachingOptions:
enableCache: true
componentRef:
name: comp-automl-tabular-cv-trainer-2
dependentTasks:
- automl-tabular-stage-1-tuner
- calculate-training-parameters-2
inputs:
artifacts:
materialized_cv_splits:
componentInputArtifact: pipelinechannel--merge-materialized-splits-splits
metadata:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata
transform_output:
componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output
tuning_result_input:
taskOutputArtifact:
outputArtifactKey: tuning_result_output
producerTask: automl-tabular-stage-1-tuner
parameters:
deadline_hours:
taskOutputParameter:
outputParameterKey: stage_2_deadline_hours
producerTask: calculate-training-parameters-2
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
location:
componentInputParameter: pipelinechannel--location
num_parallel_trials:
componentInputParameter: pipelinechannel--stage_2_num_parallel_trials
num_selected_trials:
componentInputParameter: pipelinechannel--stage_2_num_selected_trials
project:
componentInputParameter: pipelinechannel--project
root_dir:
componentInputParameter: pipelinechannel--root_dir
single_run_max_secs:
taskOutputParameter:
outputParameterKey: stage_2_single_run_max_secs
producerTask: calculate-training-parameters-2
worker_pool_specs_override_json:
componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override
taskInfo:
name: automl-tabular-cv-trainer-2
automl-tabular-ensemble-2:
cachingOptions:
enableCache: true
componentRef:
name: comp-automl-tabular-ensemble-2
dependentTasks:
- automl-tabular-cv-trainer-2
inputs:
artifacts:
dataset_schema:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema
instance_baseline:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline
metadata:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata
transform_output:
componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output
tuning_result_input:
taskOutputArtifact:
outputArtifactKey: tuning_result_output
producerTask: automl-tabular-cv-trainer-2
warmup_data:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split
parameters:
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
export_additional_model_without_custom_ops:
componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops
location:
componentInputParameter: pipelinechannel--location
project:
componentInputParameter: pipelinechannel--project
root_dir:
componentInputParameter: pipelinechannel--root_dir
taskInfo:
name: automl-tabular-ensemble-2
automl-tabular-infra-validator-2:
cachingOptions:
enableCache: true
componentRef:
name: comp-automl-tabular-infra-validator-2
dependentTasks:
- automl-tabular-ensemble-2
inputs:
artifacts:
unmanaged_container_model:
taskOutputArtifact:
outputArtifactKey: unmanaged_container_model
producerTask: automl-tabular-ensemble-2
taskInfo:
name: automl-tabular-infra-validator-2
automl-tabular-stage-1-tuner:
cachingOptions:
enableCache: true
componentRef:
name: comp-automl-tabular-stage-1-tuner
dependentTasks:
- calculate-training-parameters-2
inputs:
artifacts:
materialized_eval_split:
componentInputArtifact: pipelinechannel--automl-tabular-transform-materialized_eval_split
materialized_train_split:
componentInputArtifact: pipelinechannel--automl-tabular-transform-materialized_train_split
metadata:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata
transform_output:
componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output
parameters:
deadline_hours:
taskOutputParameter:
outputParameterKey: stage_1_deadline_hours
producerTask: calculate-training-parameters-2
disable_early_stopping:
componentInputParameter: pipelinechannel--disable_early_stopping
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
location:
componentInputParameter: pipelinechannel--location
num_parallel_trials:
componentInputParameter: pipelinechannel--stage_1_num_parallel_trials
num_selected_trials:
taskOutputParameter:
outputParameterKey: stage_1_num_selected_trials
producerTask: calculate-training-parameters-2
project:
componentInputParameter: pipelinechannel--project
reduce_search_space_mode:
taskOutputParameter:
outputParameterKey: reduce_search_space_mode
producerTask: calculate-training-parameters-2
root_dir:
componentInputParameter: pipelinechannel--root_dir
single_run_max_secs:
taskOutputParameter:
outputParameterKey: stage_1_single_run_max_secs
producerTask: calculate-training-parameters-2
study_spec_parameters_override:
componentInputParameter: pipelinechannel--study_spec_parameters_override
worker_pool_specs_override_json:
componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override
taskInfo:
name: automl-tabular-stage-1-tuner
bool-identity-2:
cachingOptions:
enableCache: true
componentRef:
name: comp-bool-identity-2
inputs:
parameters:
value:
componentInputParameter: pipelinechannel--run_evaluation
taskInfo:
name: bool-identity-2
bool-identity-3:
cachingOptions:
enableCache: true
componentRef:
name: comp-bool-identity-3
inputs:
parameters:
value:
componentInputParameter: pipelinechannel--run_distillation
taskInfo:
name: bool-identity-3
calculate-training-parameters-2:
cachingOptions:
enableCache: true
componentRef:
name: comp-calculate-training-parameters-2
inputs:
parameters:
fast_testing:
componentInputParameter: pipelinechannel--fast_testing
is_skip_architecture_search:
runtimeValue:
constant: 0.0
run_distillation:
componentInputParameter: pipelinechannel--run_distillation
stage_1_num_parallel_trials:
componentInputParameter: pipelinechannel--stage_1_num_parallel_trials
stage_2_num_parallel_trials:
componentInputParameter: pipelinechannel--stage_2_num_parallel_trials
train_budget_milli_node_hours:
componentInputParameter: pipelinechannel--train_budget_milli_node_hours
taskInfo:
name: calculate-training-parameters-2
condition-5:
componentRef:
name: comp-condition-5
dependentTasks:
- automl-tabular-ensemble-2
- bool-identity-2
- bool-identity-3
inputs:
artifacts:
pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact:
taskOutputArtifact:
outputArtifactKey: explanation_metadata_artifact
producerTask: automl-tabular-ensemble-2
pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model:
taskOutputArtifact:
outputArtifactKey: unmanaged_container_model
producerTask: automl-tabular-ensemble-2
pipelinechannel--parent_model:
componentInputArtifact: pipelinechannel--parent_model
parameters:
pipelinechannel--automl-tabular-ensemble-2-explanation_parameters:
taskOutputParameter:
outputParameterKey: explanation_parameters
producerTask: automl-tabular-ensemble-2
pipelinechannel--bool-identity-2-Output:
taskOutputParameter:
outputParameterKey: Output
producerTask: bool-identity-2
pipelinechannel--bool-identity-3-Output:
taskOutputParameter:
outputParameterKey: Output
producerTask: bool-identity-3
pipelinechannel--dataflow_service_account:
componentInputParameter: pipelinechannel--dataflow_service_account
pipelinechannel--dataflow_subnetwork:
componentInputParameter: pipelinechannel--dataflow_subnetwork
pipelinechannel--dataflow_use_public_ips:
componentInputParameter: pipelinechannel--dataflow_use_public_ips
pipelinechannel--encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
pipelinechannel--evaluation_batch_explain_machine_type:
componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type
pipelinechannel--evaluation_batch_explain_max_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count
pipelinechannel--evaluation_batch_explain_starting_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count
pipelinechannel--evaluation_batch_predict_machine_type:
componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type
pipelinechannel--evaluation_batch_predict_max_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count
pipelinechannel--evaluation_batch_predict_starting_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count
pipelinechannel--evaluation_dataflow_disk_size_gb:
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb
pipelinechannel--evaluation_dataflow_machine_type:
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type
pipelinechannel--evaluation_dataflow_max_num_workers:
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers
pipelinechannel--evaluation_dataflow_starting_num_workers:
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers
pipelinechannel--location:
componentInputParameter: pipelinechannel--location
pipelinechannel--model_description:
componentInputParameter: pipelinechannel--model_description
pipelinechannel--prediction_type:
componentInputParameter: pipelinechannel--prediction_type
pipelinechannel--project:
componentInputParameter: pipelinechannel--project
pipelinechannel--root_dir:
componentInputParameter: pipelinechannel--root_dir
pipelinechannel--set-optional-inputs-model_display_name:
componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name
pipelinechannel--string-not-empty-Output:
componentInputParameter: pipelinechannel--string-not-empty-Output
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json:
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json
pipelinechannel--tabular-stats-and-example-gen-test_split_json:
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json
pipelinechannel--target_column:
componentInputParameter: pipelinechannel--target_column
taskInfo:
name: no-distill
triggerPolicy:
condition: inputs.parameter_values['pipelinechannel--bool-identity-3-Output']
== 'false'
condition-7:
componentRef:
name: comp-condition-7
dependentTasks:
- automl-tabular-ensemble-2
- bool-identity-2
- bool-identity-3
- calculate-training-parameters-2
inputs:
artifacts:
pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model:
taskOutputArtifact:
outputArtifactKey: unmanaged_container_model
producerTask: automl-tabular-ensemble-2
pipelinechannel--tabular-stats-and-example-gen-dataset_schema:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema
pipelinechannel--tabular-stats-and-example-gen-eval_split:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split
pipelinechannel--tabular-stats-and-example-gen-instance_baseline:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline
pipelinechannel--tabular-stats-and-example-gen-metadata:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata
pipelinechannel--tabular-stats-and-example-gen-test_split:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-test_split
pipelinechannel--tabular-stats-and-example-gen-train_split:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-train_split
parameters:
pipelinechannel--bool-identity-2-Output:
taskOutputParameter:
outputParameterKey: Output
producerTask: bool-identity-2
pipelinechannel--bool-identity-3-Output:
taskOutputParameter:
outputParameterKey: Output
producerTask: bool-identity-3
pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours:
taskOutputParameter:
outputParameterKey: distill_stage_1_deadline_hours
producerTask: calculate-training-parameters-2
pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode:
taskOutputParameter:
outputParameterKey: reduce_search_space_mode
producerTask: calculate-training-parameters-2
pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs:
taskOutputParameter:
outputParameterKey: stage_1_single_run_max_secs
producerTask: calculate-training-parameters-2
pipelinechannel--dataflow_service_account:
componentInputParameter: pipelinechannel--dataflow_service_account
pipelinechannel--dataflow_subnetwork:
componentInputParameter: pipelinechannel--dataflow_subnetwork
pipelinechannel--dataflow_use_public_ips:
componentInputParameter: pipelinechannel--dataflow_use_public_ips
pipelinechannel--disable_early_stopping:
componentInputParameter: pipelinechannel--disable_early_stopping
pipelinechannel--distill_batch_predict_machine_type:
componentInputParameter: pipelinechannel--distill_batch_predict_machine_type
pipelinechannel--distill_batch_predict_max_replica_count:
componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count
pipelinechannel--distill_batch_predict_starting_replica_count:
componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count
pipelinechannel--encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
pipelinechannel--evaluation_batch_explain_machine_type:
componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type
pipelinechannel--evaluation_batch_explain_max_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count
pipelinechannel--evaluation_batch_explain_starting_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count
pipelinechannel--evaluation_batch_predict_machine_type:
componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type
pipelinechannel--evaluation_batch_predict_max_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count
pipelinechannel--evaluation_batch_predict_starting_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count
pipelinechannel--evaluation_dataflow_disk_size_gb:
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb
pipelinechannel--evaluation_dataflow_machine_type:
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type
pipelinechannel--evaluation_dataflow_max_num_workers:
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers
pipelinechannel--evaluation_dataflow_starting_num_workers:
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers
pipelinechannel--export_additional_model_without_custom_ops:
componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops
pipelinechannel--location:
componentInputParameter: pipelinechannel--location
pipelinechannel--prediction_type:
componentInputParameter: pipelinechannel--prediction_type
pipelinechannel--project:
componentInputParameter: pipelinechannel--project
pipelinechannel--root_dir:
componentInputParameter: pipelinechannel--root_dir
pipelinechannel--stage_1_num_parallel_trials:
componentInputParameter: pipelinechannel--stage_1_num_parallel_trials
pipelinechannel--stage_1_tuner_worker_pool_specs_override:
componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override
pipelinechannel--string-not-empty-Output:
componentInputParameter: pipelinechannel--string-not-empty-Output
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json:
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json
pipelinechannel--tabular-stats-and-example-gen-test_split_json:
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json
pipelinechannel--target_column:
componentInputParameter: pipelinechannel--target_column
pipelinechannel--transform_dataflow_disk_size_gb:
componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb
pipelinechannel--transform_dataflow_machine_type:
componentInputParameter: pipelinechannel--transform_dataflow_machine_type
pipelinechannel--transform_dataflow_max_num_workers:
componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers
taskInfo:
name: is-distill
triggerPolicy:
condition: inputs.parameter_values['pipelinechannel--bool-identity-3-Output']
== 'true'
inputDefinitions:
artifacts:
pipelinechannel--automl-tabular-transform-materialized_eval_split:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
pipelinechannel--automl-tabular-transform-materialized_train_split:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
pipelinechannel--automl-tabular-transform-transform_output:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
pipelinechannel--merge-materialized-splits-splits:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
pipelinechannel--parent_model:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
pipelinechannel--tabular-stats-and-example-gen-dataset_schema:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
pipelinechannel--tabular-stats-and-example-gen-eval_split:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
pipelinechannel--tabular-stats-and-example-gen-instance_baseline:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
pipelinechannel--tabular-stats-and-example-gen-metadata:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
pipelinechannel--tabular-stats-and-example-gen-test_split:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
pipelinechannel--tabular-stats-and-example-gen-train_split:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
parameters:
pipelinechannel--cv_trainer_worker_pool_specs_override:
parameterType: LIST
pipelinechannel--dataflow_service_account:
parameterType: STRING
pipelinechannel--dataflow_subnetwork:
parameterType: STRING
pipelinechannel--dataflow_use_public_ips:
parameterType: BOOLEAN
pipelinechannel--disable_early_stopping:
parameterType: BOOLEAN
pipelinechannel--distill_batch_predict_machine_type:
parameterType: STRING
pipelinechannel--distill_batch_predict_max_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--distill_batch_predict_starting_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--encryption_spec_key_name:
parameterType: STRING
pipelinechannel--evaluation_batch_explain_machine_type:
parameterType: STRING
pipelinechannel--evaluation_batch_explain_max_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_batch_explain_starting_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_batch_predict_machine_type:
parameterType: STRING
pipelinechannel--evaluation_batch_predict_max_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_batch_predict_starting_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_dataflow_disk_size_gb:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_dataflow_machine_type:
parameterType: STRING
pipelinechannel--evaluation_dataflow_max_num_workers:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_dataflow_starting_num_workers:
parameterType: NUMBER_INTEGER
pipelinechannel--export_additional_model_without_custom_ops:
parameterType: BOOLEAN
pipelinechannel--fast_testing:
parameterType: BOOLEAN
pipelinechannel--location:
parameterType: STRING
pipelinechannel--model_description:
parameterType: STRING
pipelinechannel--prediction_type:
parameterType: STRING
pipelinechannel--project:
parameterType: STRING
pipelinechannel--root_dir:
parameterType: STRING
pipelinechannel--run_distillation:
parameterType: BOOLEAN
pipelinechannel--run_evaluation:
parameterType: BOOLEAN
pipelinechannel--set-optional-inputs-model_display_name:
parameterType: STRING
pipelinechannel--stage_1_num_parallel_trials:
parameterType: NUMBER_INTEGER
pipelinechannel--stage_1_tuner_worker_pool_specs_override:
parameterType: LIST
pipelinechannel--stage_2_num_parallel_trials:
parameterType: NUMBER_INTEGER
pipelinechannel--stage_2_num_selected_trials:
parameterType: NUMBER_INTEGER
pipelinechannel--string-not-empty-Output:
parameterType: STRING
pipelinechannel--study_spec_parameters_override:
parameterType: LIST
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json:
parameterType: LIST
pipelinechannel--tabular-stats-and-example-gen-test_split_json:
parameterType: LIST
pipelinechannel--target_column:
parameterType: STRING
pipelinechannel--train_budget_milli_node_hours:
parameterType: NUMBER_DOUBLE
pipelinechannel--transform_dataflow_disk_size_gb:
parameterType: NUMBER_INTEGER
pipelinechannel--transform_dataflow_machine_type:
parameterType: STRING
pipelinechannel--transform_dataflow_max_num_workers:
parameterType: NUMBER_INTEGER
outputDefinitions:
artifacts:
feature-attribution-2-feature_attributions:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
feature-attribution-3-feature_attributions:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
model-evaluation-2-evaluation_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
model-evaluation-3-evaluation_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
comp-condition-5:
dag:
outputs:
artifacts:
feature-attribution-2-feature_attributions:
artifactSelectors:
- outputArtifactKey: feature-attribution-2-feature_attributions
producerSubtask: condition-6
model-evaluation-2-evaluation_metrics:
artifactSelectors:
- outputArtifactKey: model-evaluation-2-evaluation_metrics
producerSubtask: condition-6
tasks:
condition-6:
componentRef:
name: comp-condition-6
dependentTasks:
- model-upload-2
inputs:
artifacts:
pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact:
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact
pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model:
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model
pipelinechannel--model-upload-2-model:
taskOutputArtifact:
outputArtifactKey: model
producerTask: model-upload-2
parameters:
pipelinechannel--automl-tabular-ensemble-2-explanation_parameters:
componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters
pipelinechannel--bool-identity-2-Output:
componentInputParameter: pipelinechannel--bool-identity-2-Output
pipelinechannel--bool-identity-3-Output:
componentInputParameter: pipelinechannel--bool-identity-3-Output
pipelinechannel--dataflow_service_account:
componentInputParameter: pipelinechannel--dataflow_service_account
pipelinechannel--dataflow_subnetwork:
componentInputParameter: pipelinechannel--dataflow_subnetwork
pipelinechannel--dataflow_use_public_ips:
componentInputParameter: pipelinechannel--dataflow_use_public_ips
pipelinechannel--encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
pipelinechannel--evaluation_batch_explain_machine_type:
componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type
pipelinechannel--evaluation_batch_explain_max_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count
pipelinechannel--evaluation_batch_explain_starting_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count
pipelinechannel--evaluation_batch_predict_machine_type:
componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type
pipelinechannel--evaluation_batch_predict_max_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count
pipelinechannel--evaluation_batch_predict_starting_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count
pipelinechannel--evaluation_dataflow_disk_size_gb:
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb
pipelinechannel--evaluation_dataflow_machine_type:
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type
pipelinechannel--evaluation_dataflow_max_num_workers:
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers
pipelinechannel--evaluation_dataflow_starting_num_workers:
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers
pipelinechannel--location:
componentInputParameter: pipelinechannel--location
pipelinechannel--prediction_type:
componentInputParameter: pipelinechannel--prediction_type
pipelinechannel--project:
componentInputParameter: pipelinechannel--project
pipelinechannel--root_dir:
componentInputParameter: pipelinechannel--root_dir
pipelinechannel--string-not-empty-Output:
componentInputParameter: pipelinechannel--string-not-empty-Output
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json:
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json
pipelinechannel--tabular-stats-and-example-gen-test_split_json:
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json
pipelinechannel--target_column:
componentInputParameter: pipelinechannel--target_column
taskInfo:
name: is-evaluation
triggerPolicy:
condition: inputs.parameter_values['pipelinechannel--bool-identity-2-Output']
== 'true'
model-upload-2:
cachingOptions:
enableCache: true
componentRef:
name: comp-model-upload-2
inputs:
artifacts:
explanation_metadata_artifact:
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact
parent_model:
componentInputArtifact: pipelinechannel--parent_model
unmanaged_container_model:
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model
parameters:
description:
componentInputParameter: pipelinechannel--model_description
display_name:
componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
explanation_parameters:
componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters
location:
componentInputParameter: pipelinechannel--location
project:
componentInputParameter: pipelinechannel--project
taskInfo:
name: model-upload-2
inputDefinitions:
artifacts:
pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model:
artifactType:
schemaTitle: google.UnmanagedContainerModel
schemaVersion: 0.0.1
pipelinechannel--parent_model:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
parameters:
pipelinechannel--automl-tabular-ensemble-2-explanation_parameters:
parameterType: STRUCT
pipelinechannel--bool-identity-2-Output:
parameterType: STRING
pipelinechannel--bool-identity-3-Output:
parameterType: STRING
pipelinechannel--dataflow_service_account:
parameterType: STRING
pipelinechannel--dataflow_subnetwork:
parameterType: STRING
pipelinechannel--dataflow_use_public_ips:
parameterType: BOOLEAN
pipelinechannel--encryption_spec_key_name:
parameterType: STRING
pipelinechannel--evaluation_batch_explain_machine_type:
parameterType: STRING
pipelinechannel--evaluation_batch_explain_max_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_batch_explain_starting_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_batch_predict_machine_type:
parameterType: STRING
pipelinechannel--evaluation_batch_predict_max_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_batch_predict_starting_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_dataflow_disk_size_gb:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_dataflow_machine_type:
parameterType: STRING
pipelinechannel--evaluation_dataflow_max_num_workers:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_dataflow_starting_num_workers:
parameterType: NUMBER_INTEGER
pipelinechannel--location:
parameterType: STRING
pipelinechannel--model_description:
parameterType: STRING
pipelinechannel--prediction_type:
parameterType: STRING
pipelinechannel--project:
parameterType: STRING
pipelinechannel--root_dir:
parameterType: STRING
pipelinechannel--set-optional-inputs-model_display_name:
parameterType: STRING
pipelinechannel--string-not-empty-Output:
parameterType: STRING
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json:
parameterType: LIST
pipelinechannel--tabular-stats-and-example-gen-test_split_json:
parameterType: LIST
pipelinechannel--target_column:
parameterType: STRING
outputDefinitions:
artifacts:
feature-attribution-2-feature_attributions:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
model-evaluation-2-evaluation_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
comp-condition-6:
dag:
outputs:
artifacts:
feature-attribution-2-feature_attributions:
artifactSelectors:
- outputArtifactKey: feature_attributions
producerSubtask: feature-attribution-2
model-evaluation-2-evaluation_metrics:
artifactSelectors:
- outputArtifactKey: evaluation_metrics
producerSubtask: model-evaluation-2
tasks:
feature-attribution-2:
cachingOptions:
enableCache: true
componentRef:
name: comp-feature-attribution-2
dependentTasks:
- model-batch-explanation-2
inputs:
artifacts:
predictions_gcs_source:
taskOutputArtifact:
outputArtifactKey: gcs_output_directory
producerTask: model-batch-explanation-2
parameters:
dataflow_disk_size:
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb
dataflow_machine_type:
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type
dataflow_max_workers_num:
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers
dataflow_service_account:
componentInputParameter: pipelinechannel--dataflow_service_account
dataflow_subnetwork:
componentInputParameter: pipelinechannel--dataflow_subnetwork
dataflow_use_public_ips:
componentInputParameter: pipelinechannel--dataflow_use_public_ips
dataflow_workers_num:
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
location:
componentInputParameter: pipelinechannel--location
predictions_format:
runtimeValue:
constant: jsonl
project:
componentInputParameter: pipelinechannel--project
taskInfo:
name: feature-attribution-2
model-batch-explanation-2:
cachingOptions:
enableCache: true
componentRef:
name: comp-model-batch-explanation-2
inputs:
artifacts:
explanation_metadata_artifact:
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact
unmanaged_container_model:
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model
parameters:
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
explanation_parameters:
componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters
gcs_destination_output_uri_prefix:
componentInputParameter: pipelinechannel--root_dir
gcs_source_uris:
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json
generate_explanation:
runtimeValue:
constant: 1.0
instances_format:
runtimeValue:
constant: tf-record
job_display_name:
runtimeValue:
constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}
location:
componentInputParameter: pipelinechannel--location
machine_type:
componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type
max_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count
predictions_format:
runtimeValue:
constant: jsonl
project:
componentInputParameter: pipelinechannel--project
starting_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count
taskInfo:
name: model-batch-explanation-2
model-batch-predict-2:
cachingOptions:
enableCache: true
componentRef:
name: comp-model-batch-predict-2
inputs:
artifacts:
unmanaged_container_model:
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model
parameters:
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
gcs_destination_output_uri_prefix:
componentInputParameter: pipelinechannel--root_dir
gcs_source_uris:
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json
instances_format:
runtimeValue:
constant: tf-record
job_display_name:
runtimeValue:
constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}
location:
componentInputParameter: pipelinechannel--location
machine_type:
componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type
max_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count
predictions_format:
runtimeValue:
constant: jsonl
project:
componentInputParameter: pipelinechannel--project
starting_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count
taskInfo:
name: model-batch-predict-2
model-evaluation-2:
cachingOptions:
enableCache: true
componentRef:
name: comp-model-evaluation-2
dependentTasks:
- model-batch-predict-2
inputs:
artifacts:
batch_prediction_job:
taskOutputArtifact:
outputArtifactKey: batchpredictionjob
producerTask: model-batch-predict-2
parameters:
dataflow_disk_size:
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb
dataflow_machine_type:
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type
dataflow_max_workers_num:
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers
dataflow_service_account:
componentInputParameter: pipelinechannel--dataflow_service_account
dataflow_subnetwork:
componentInputParameter: pipelinechannel--dataflow_subnetwork
dataflow_use_public_ips:
componentInputParameter: pipelinechannel--dataflow_use_public_ips
dataflow_workers_num:
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
ground_truth_column:
componentInputParameter: pipelinechannel--target_column
ground_truth_format:
runtimeValue:
constant: jsonl
location:
componentInputParameter: pipelinechannel--location
prediction_label_column:
runtimeValue:
constant: ''
prediction_score_column:
runtimeValue:
constant: ''
predictions_format:
runtimeValue:
constant: jsonl
problem_type:
componentInputParameter: pipelinechannel--prediction_type
project:
componentInputParameter: pipelinechannel--project
root_dir:
componentInputParameter: pipelinechannel--root_dir
taskInfo:
name: model-evaluation-2
model-evaluation-import-2:
cachingOptions:
enableCache: true
componentRef:
name: comp-model-evaluation-import-2
dependentTasks:
- feature-attribution-2
- model-evaluation-2
inputs:
artifacts:
feature_attributions:
taskOutputArtifact:
outputArtifactKey: feature_attributions
producerTask: feature-attribution-2
metrics:
taskOutputArtifact:
outputArtifactKey: evaluation_metrics
producerTask: model-evaluation-2
model:
componentInputArtifact: pipelinechannel--model-upload-2-model
parameters:
dataset_paths:
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json
dataset_type:
runtimeValue:
constant: tf-record
display_name:
runtimeValue:
constant: AutoML Tabular
problem_type:
componentInputParameter: pipelinechannel--prediction_type
taskInfo:
name: model-evaluation-import-2
inputDefinitions:
artifacts:
pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model:
artifactType:
schemaTitle: google.UnmanagedContainerModel
schemaVersion: 0.0.1
pipelinechannel--model-upload-2-model:
artifactType:
schemaTitle: google.VertexModel
schemaVersion: 0.0.1
parameters:
pipelinechannel--automl-tabular-ensemble-2-explanation_parameters:
parameterType: STRUCT
pipelinechannel--bool-identity-2-Output:
parameterType: STRING
pipelinechannel--bool-identity-3-Output:
parameterType: STRING
pipelinechannel--dataflow_service_account:
parameterType: STRING
pipelinechannel--dataflow_subnetwork:
parameterType: STRING
pipelinechannel--dataflow_use_public_ips:
parameterType: BOOLEAN
pipelinechannel--encryption_spec_key_name:
parameterType: STRING
pipelinechannel--evaluation_batch_explain_machine_type:
parameterType: STRING
pipelinechannel--evaluation_batch_explain_max_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_batch_explain_starting_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_batch_predict_machine_type:
parameterType: STRING
pipelinechannel--evaluation_batch_predict_max_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_batch_predict_starting_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_dataflow_disk_size_gb:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_dataflow_machine_type:
parameterType: STRING
pipelinechannel--evaluation_dataflow_max_num_workers:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_dataflow_starting_num_workers:
parameterType: NUMBER_INTEGER
pipelinechannel--location:
parameterType: STRING
pipelinechannel--prediction_type:
parameterType: STRING
pipelinechannel--project:
parameterType: STRING
pipelinechannel--root_dir:
parameterType: STRING
pipelinechannel--string-not-empty-Output:
parameterType: STRING
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json:
parameterType: LIST
pipelinechannel--tabular-stats-and-example-gen-test_split_json:
parameterType: LIST
pipelinechannel--target_column:
parameterType: STRING
outputDefinitions:
artifacts:
feature-attribution-2-feature_attributions:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
model-evaluation-2-evaluation_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
comp-condition-7:
dag:
outputs:
artifacts:
feature-attribution-3-feature_attributions:
artifactSelectors:
- outputArtifactKey: feature-attribution-3-feature_attributions
producerSubtask: condition-8
model-evaluation-3-evaluation_metrics:
artifactSelectors:
- outputArtifactKey: model-evaluation-3-evaluation_metrics
producerSubtask: condition-8
tasks:
automl-tabular-ensemble-3:
cachingOptions:
enableCache: true
componentRef:
name: comp-automl-tabular-ensemble-3
dependentTasks:
- automl-tabular-stage-1-tuner-2
- automl-tabular-transform-2
inputs:
artifacts:
dataset_schema:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema
instance_baseline:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline
metadata:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata
transform_output:
taskOutputArtifact:
outputArtifactKey: transform_output
producerTask: automl-tabular-transform-2
tuning_result_input:
taskOutputArtifact:
outputArtifactKey: tuning_result_output
producerTask: automl-tabular-stage-1-tuner-2
warmup_data:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split
parameters:
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
export_additional_model_without_custom_ops:
componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops
location:
componentInputParameter: pipelinechannel--location
project:
componentInputParameter: pipelinechannel--project
root_dir:
componentInputParameter: pipelinechannel--root_dir
taskInfo:
name: automl-tabular-ensemble-3
automl-tabular-infra-validator-3:
cachingOptions:
enableCache: true
componentRef:
name: comp-automl-tabular-infra-validator-3
dependentTasks:
- automl-tabular-ensemble-3
inputs:
artifacts:
unmanaged_container_model:
taskOutputArtifact:
outputArtifactKey: unmanaged_container_model
producerTask: automl-tabular-ensemble-3
taskInfo:
name: automl-tabular-infra-validator-3
automl-tabular-stage-1-tuner-2:
cachingOptions:
enableCache: true
componentRef:
name: comp-automl-tabular-stage-1-tuner-2
dependentTasks:
- automl-tabular-transform-2
inputs:
artifacts:
materialized_eval_split:
taskOutputArtifact:
outputArtifactKey: materialized_eval_split
producerTask: automl-tabular-transform-2
materialized_train_split:
taskOutputArtifact:
outputArtifactKey: materialized_train_split
producerTask: automl-tabular-transform-2
metadata:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata
transform_output:
taskOutputArtifact:
outputArtifactKey: transform_output
producerTask: automl-tabular-transform-2
parameters:
deadline_hours:
componentInputParameter: pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours
disable_early_stopping:
componentInputParameter: pipelinechannel--disable_early_stopping
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
location:
componentInputParameter: pipelinechannel--location
num_parallel_trials:
componentInputParameter: pipelinechannel--stage_1_num_parallel_trials
num_selected_trials:
runtimeValue:
constant: 1.0
project:
componentInputParameter: pipelinechannel--project
reduce_search_space_mode:
componentInputParameter: pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode
root_dir:
componentInputParameter: pipelinechannel--root_dir
run_distillation:
runtimeValue:
constant: 1.0
single_run_max_secs:
componentInputParameter: pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs
worker_pool_specs_override_json:
componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override
taskInfo:
name: automl-tabular-stage-1-tuner-2
automl-tabular-transform-2:
cachingOptions:
enableCache: true
componentRef:
name: comp-automl-tabular-transform-2
dependentTasks:
- write-bp-result-path
- write-bp-result-path-2
inputs:
artifacts:
dataset_schema:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema
eval_split:
taskOutputArtifact:
outputArtifactKey: result
producerTask: write-bp-result-path-2
metadata:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata
test_split:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-test_split
train_split:
taskOutputArtifact:
outputArtifactKey: result
producerTask: write-bp-result-path
parameters:
dataflow_disk_size_gb:
componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb
dataflow_machine_type:
componentInputParameter: pipelinechannel--transform_dataflow_machine_type
dataflow_max_num_workers:
componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers
dataflow_service_account:
componentInputParameter: pipelinechannel--dataflow_service_account
dataflow_subnetwork:
componentInputParameter: pipelinechannel--dataflow_subnetwork
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
location:
componentInputParameter: pipelinechannel--location
project:
componentInputParameter: pipelinechannel--project
root_dir:
componentInputParameter: pipelinechannel--root_dir
taskInfo:
name: automl-tabular-transform-2
condition-8:
componentRef:
name: comp-condition-8
dependentTasks:
- automl-tabular-ensemble-3
- model-upload-3
inputs:
artifacts:
pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact:
taskOutputArtifact:
outputArtifactKey: explanation_metadata_artifact
producerTask: automl-tabular-ensemble-3
pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model:
taskOutputArtifact:
outputArtifactKey: unmanaged_container_model
producerTask: automl-tabular-ensemble-3
pipelinechannel--model-upload-3-model:
taskOutputArtifact:
outputArtifactKey: model
producerTask: model-upload-3
parameters:
pipelinechannel--automl-tabular-ensemble-3-explanation_parameters:
taskOutputParameter:
outputParameterKey: explanation_parameters
producerTask: automl-tabular-ensemble-3
pipelinechannel--bool-identity-2-Output:
componentInputParameter: pipelinechannel--bool-identity-2-Output
pipelinechannel--bool-identity-3-Output:
componentInputParameter: pipelinechannel--bool-identity-3-Output
pipelinechannel--dataflow_service_account:
componentInputParameter: pipelinechannel--dataflow_service_account
pipelinechannel--dataflow_subnetwork:
componentInputParameter: pipelinechannel--dataflow_subnetwork
pipelinechannel--dataflow_use_public_ips:
componentInputParameter: pipelinechannel--dataflow_use_public_ips
pipelinechannel--encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
pipelinechannel--evaluation_batch_explain_machine_type:
componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type
pipelinechannel--evaluation_batch_explain_max_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count
pipelinechannel--evaluation_batch_explain_starting_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count
pipelinechannel--evaluation_batch_predict_machine_type:
componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type
pipelinechannel--evaluation_batch_predict_max_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count
pipelinechannel--evaluation_batch_predict_starting_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count
pipelinechannel--evaluation_dataflow_disk_size_gb:
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb
pipelinechannel--evaluation_dataflow_machine_type:
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type
pipelinechannel--evaluation_dataflow_max_num_workers:
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers
pipelinechannel--evaluation_dataflow_starting_num_workers:
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers
pipelinechannel--location:
componentInputParameter: pipelinechannel--location
pipelinechannel--prediction_type:
componentInputParameter: pipelinechannel--prediction_type
pipelinechannel--project:
componentInputParameter: pipelinechannel--project
pipelinechannel--root_dir:
componentInputParameter: pipelinechannel--root_dir
pipelinechannel--string-not-empty-Output:
componentInputParameter: pipelinechannel--string-not-empty-Output
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json:
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json
pipelinechannel--tabular-stats-and-example-gen-test_split_json:
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json
pipelinechannel--target_column:
componentInputParameter: pipelinechannel--target_column
taskInfo:
name: is-evaluation
triggerPolicy:
condition: inputs.parameter_values['pipelinechannel--bool-identity-2-Output']
== 'true'
model-batch-predict-3:
cachingOptions:
enableCache: true
componentRef:
name: comp-model-batch-predict-3
dependentTasks:
- read-input-uri
inputs:
artifacts:
unmanaged_container_model:
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model
parameters:
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
gcs_destination_output_uri_prefix:
componentInputParameter: pipelinechannel--root_dir
gcs_source_uris:
taskOutputParameter:
outputParameterKey: Output
producerTask: read-input-uri
instances_format:
runtimeValue:
constant: tf-record
job_display_name:
runtimeValue:
constant: batch-predict-train-split
location:
componentInputParameter: pipelinechannel--location
machine_type:
componentInputParameter: pipelinechannel--distill_batch_predict_machine_type
max_replica_count:
componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count
predictions_format:
runtimeValue:
constant: tf-record
project:
componentInputParameter: pipelinechannel--project
starting_replica_count:
componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count
taskInfo:
name: model-batch-predict-3
model-batch-predict-4:
cachingOptions:
enableCache: true
componentRef:
name: comp-model-batch-predict-4
dependentTasks:
- read-input-uri-2
inputs:
artifacts:
unmanaged_container_model:
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model
parameters:
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
gcs_destination_output_uri_prefix:
componentInputParameter: pipelinechannel--root_dir
gcs_source_uris:
taskOutputParameter:
outputParameterKey: Output
producerTask: read-input-uri-2
instances_format:
runtimeValue:
constant: tf-record
job_display_name:
runtimeValue:
constant: batch-predict-eval-split
location:
componentInputParameter: pipelinechannel--location
machine_type:
componentInputParameter: pipelinechannel--distill_batch_predict_machine_type
max_replica_count:
componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count
predictions_format:
runtimeValue:
constant: tf-record
project:
componentInputParameter: pipelinechannel--project
starting_replica_count:
componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count
taskInfo:
name: model-batch-predict-4
model-upload-3:
cachingOptions:
enableCache: true
componentRef:
name: comp-model-upload-3
dependentTasks:
- automl-tabular-ensemble-3
- automl-tabular-infra-validator-3
inputs:
artifacts:
explanation_metadata_artifact:
taskOutputArtifact:
outputArtifactKey: explanation_metadata_artifact
producerTask: automl-tabular-ensemble-3
unmanaged_container_model:
taskOutputArtifact:
outputArtifactKey: unmanaged_container_model
producerTask: automl-tabular-ensemble-3
parameters:
display_name:
runtimeValue:
constant: automl-tabular-distill-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
explanation_parameters:
taskOutputParameter:
outputParameterKey: explanation_parameters
producerTask: automl-tabular-ensemble-3
location:
componentInputParameter: pipelinechannel--location
project:
componentInputParameter: pipelinechannel--project
taskInfo:
name: model-upload-3
read-input-uri:
cachingOptions:
enableCache: true
componentRef:
name: comp-read-input-uri
inputs:
artifacts:
split_uri:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-train_split
taskInfo:
name: read-input-uri
read-input-uri-2:
cachingOptions:
enableCache: true
componentRef:
name: comp-read-input-uri-2
inputs:
artifacts:
split_uri:
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split
taskInfo:
name: read-input-uri-2
write-bp-result-path:
cachingOptions:
enableCache: true
componentRef:
name: comp-write-bp-result-path
dependentTasks:
- model-batch-predict-3
inputs:
artifacts:
bp_job:
taskOutputArtifact:
outputArtifactKey: batchpredictionjob
producerTask: model-batch-predict-3
taskInfo:
name: write-bp-result-path
write-bp-result-path-2:
cachingOptions:
enableCache: true
componentRef:
name: comp-write-bp-result-path-2
dependentTasks:
- model-batch-predict-4
inputs:
artifacts:
bp_job:
taskOutputArtifact:
outputArtifactKey: batchpredictionjob
producerTask: model-batch-predict-4
taskInfo:
name: write-bp-result-path-2
inputDefinitions:
artifacts:
pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model:
artifactType:
schemaTitle: google.UnmanagedContainerModel
schemaVersion: 0.0.1
pipelinechannel--tabular-stats-and-example-gen-dataset_schema:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
pipelinechannel--tabular-stats-and-example-gen-eval_split:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
pipelinechannel--tabular-stats-and-example-gen-instance_baseline:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
pipelinechannel--tabular-stats-and-example-gen-metadata:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
pipelinechannel--tabular-stats-and-example-gen-test_split:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
pipelinechannel--tabular-stats-and-example-gen-train_split:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
parameters:
pipelinechannel--bool-identity-2-Output:
parameterType: STRING
pipelinechannel--bool-identity-3-Output:
parameterType: STRING
pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours:
parameterType: NUMBER_DOUBLE
pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode:
parameterType: STRING
pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs:
parameterType: NUMBER_INTEGER
pipelinechannel--dataflow_service_account:
parameterType: STRING
pipelinechannel--dataflow_subnetwork:
parameterType: STRING
pipelinechannel--dataflow_use_public_ips:
parameterType: BOOLEAN
pipelinechannel--disable_early_stopping:
parameterType: BOOLEAN
pipelinechannel--distill_batch_predict_machine_type:
parameterType: STRING
pipelinechannel--distill_batch_predict_max_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--distill_batch_predict_starting_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--encryption_spec_key_name:
parameterType: STRING
pipelinechannel--evaluation_batch_explain_machine_type:
parameterType: STRING
pipelinechannel--evaluation_batch_explain_max_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_batch_explain_starting_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_batch_predict_machine_type:
parameterType: STRING
pipelinechannel--evaluation_batch_predict_max_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_batch_predict_starting_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_dataflow_disk_size_gb:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_dataflow_machine_type:
parameterType: STRING
pipelinechannel--evaluation_dataflow_max_num_workers:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_dataflow_starting_num_workers:
parameterType: NUMBER_INTEGER
pipelinechannel--export_additional_model_without_custom_ops:
parameterType: BOOLEAN
pipelinechannel--location:
parameterType: STRING
pipelinechannel--prediction_type:
parameterType: STRING
pipelinechannel--project:
parameterType: STRING
pipelinechannel--root_dir:
parameterType: STRING
pipelinechannel--stage_1_num_parallel_trials:
parameterType: NUMBER_INTEGER
pipelinechannel--stage_1_tuner_worker_pool_specs_override:
parameterType: LIST
pipelinechannel--string-not-empty-Output:
parameterType: STRING
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json:
parameterType: LIST
pipelinechannel--tabular-stats-and-example-gen-test_split_json:
parameterType: LIST
pipelinechannel--target_column:
parameterType: STRING
pipelinechannel--transform_dataflow_disk_size_gb:
parameterType: NUMBER_INTEGER
pipelinechannel--transform_dataflow_machine_type:
parameterType: STRING
pipelinechannel--transform_dataflow_max_num_workers:
parameterType: NUMBER_INTEGER
outputDefinitions:
artifacts:
feature-attribution-3-feature_attributions:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
model-evaluation-3-evaluation_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
comp-condition-8:
dag:
outputs:
artifacts:
feature-attribution-3-feature_attributions:
artifactSelectors:
- outputArtifactKey: feature_attributions
producerSubtask: feature-attribution-3
model-evaluation-3-evaluation_metrics:
artifactSelectors:
- outputArtifactKey: evaluation_metrics
producerSubtask: model-evaluation-3
tasks:
feature-attribution-3:
cachingOptions:
enableCache: true
componentRef:
name: comp-feature-attribution-3
dependentTasks:
- model-batch-explanation-3
inputs:
artifacts:
predictions_gcs_source:
taskOutputArtifact:
outputArtifactKey: gcs_output_directory
producerTask: model-batch-explanation-3
parameters:
dataflow_disk_size:
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb
dataflow_machine_type:
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type
dataflow_max_workers_num:
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers
dataflow_service_account:
componentInputParameter: pipelinechannel--dataflow_service_account
dataflow_subnetwork:
componentInputParameter: pipelinechannel--dataflow_subnetwork
dataflow_use_public_ips:
componentInputParameter: pipelinechannel--dataflow_use_public_ips
dataflow_workers_num:
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
location:
componentInputParameter: pipelinechannel--location
predictions_format:
runtimeValue:
constant: jsonl
project:
componentInputParameter: pipelinechannel--project
taskInfo:
name: feature-attribution-3
model-batch-explanation-3:
cachingOptions:
enableCache: true
componentRef:
name: comp-model-batch-explanation-3
inputs:
artifacts:
explanation_metadata_artifact:
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact
unmanaged_container_model:
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model
parameters:
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
explanation_parameters:
componentInputParameter: pipelinechannel--automl-tabular-ensemble-3-explanation_parameters
gcs_destination_output_uri_prefix:
componentInputParameter: pipelinechannel--root_dir
gcs_source_uris:
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json
generate_explanation:
runtimeValue:
constant: 1.0
instances_format:
runtimeValue:
constant: tf-record
job_display_name:
runtimeValue:
constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}
location:
componentInputParameter: pipelinechannel--location
machine_type:
componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type
max_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count
predictions_format:
runtimeValue:
constant: jsonl
project:
componentInputParameter: pipelinechannel--project
starting_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count
taskInfo:
name: model-batch-explanation-3
model-batch-predict-5:
cachingOptions:
enableCache: true
componentRef:
name: comp-model-batch-predict-5
inputs:
artifacts:
unmanaged_container_model:
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model
parameters:
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
gcs_destination_output_uri_prefix:
componentInputParameter: pipelinechannel--root_dir
gcs_source_uris:
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json
instances_format:
runtimeValue:
constant: tf-record
job_display_name:
runtimeValue:
constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}
location:
componentInputParameter: pipelinechannel--location
machine_type:
componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type
max_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count
predictions_format:
runtimeValue:
constant: jsonl
project:
componentInputParameter: pipelinechannel--project
starting_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count
taskInfo:
name: model-batch-predict-5
model-evaluation-3:
cachingOptions:
enableCache: true
componentRef:
name: comp-model-evaluation-3
dependentTasks:
- model-batch-predict-5
inputs:
artifacts:
batch_prediction_job:
taskOutputArtifact:
outputArtifactKey: batchpredictionjob
producerTask: model-batch-predict-5
parameters:
dataflow_disk_size:
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb
dataflow_machine_type:
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type
dataflow_max_workers_num:
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers
dataflow_service_account:
componentInputParameter: pipelinechannel--dataflow_service_account
dataflow_subnetwork:
componentInputParameter: pipelinechannel--dataflow_subnetwork
dataflow_use_public_ips:
componentInputParameter: pipelinechannel--dataflow_use_public_ips
dataflow_workers_num:
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
ground_truth_column:
componentInputParameter: pipelinechannel--target_column
ground_truth_format:
runtimeValue:
constant: jsonl
location:
componentInputParameter: pipelinechannel--location
prediction_label_column:
runtimeValue:
constant: ''
prediction_score_column:
runtimeValue:
constant: ''
predictions_format:
runtimeValue:
constant: jsonl
problem_type:
componentInputParameter: pipelinechannel--prediction_type
project:
componentInputParameter: pipelinechannel--project
root_dir:
componentInputParameter: pipelinechannel--root_dir
taskInfo:
name: model-evaluation-3
model-evaluation-import-3:
cachingOptions:
enableCache: true
componentRef:
name: comp-model-evaluation-import-3
dependentTasks:
- feature-attribution-3
- model-evaluation-3
inputs:
artifacts:
feature_attributions:
taskOutputArtifact:
outputArtifactKey: feature_attributions
producerTask: feature-attribution-3
metrics:
taskOutputArtifact:
outputArtifactKey: evaluation_metrics
producerTask: model-evaluation-3
model:
componentInputArtifact: pipelinechannel--model-upload-3-model
parameters:
dataset_paths:
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json
dataset_type:
runtimeValue:
constant: tf-record
display_name:
runtimeValue:
constant: AutoML Tabular
problem_type:
componentInputParameter: pipelinechannel--prediction_type
taskInfo:
name: model-evaluation-import-3
inputDefinitions:
artifacts:
pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model:
artifactType:
schemaTitle: google.UnmanagedContainerModel
schemaVersion: 0.0.1
pipelinechannel--model-upload-3-model:
artifactType:
schemaTitle: google.VertexModel
schemaVersion: 0.0.1
parameters:
pipelinechannel--automl-tabular-ensemble-3-explanation_parameters:
parameterType: STRUCT
pipelinechannel--bool-identity-2-Output:
parameterType: STRING
pipelinechannel--bool-identity-3-Output:
parameterType: STRING
pipelinechannel--dataflow_service_account:
parameterType: STRING
pipelinechannel--dataflow_subnetwork:
parameterType: STRING
pipelinechannel--dataflow_use_public_ips:
parameterType: BOOLEAN
pipelinechannel--encryption_spec_key_name:
parameterType: STRING
pipelinechannel--evaluation_batch_explain_machine_type:
parameterType: STRING
pipelinechannel--evaluation_batch_explain_max_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_batch_explain_starting_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_batch_predict_machine_type:
parameterType: STRING
pipelinechannel--evaluation_batch_predict_max_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_batch_predict_starting_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_dataflow_disk_size_gb:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_dataflow_machine_type:
parameterType: STRING
pipelinechannel--evaluation_dataflow_max_num_workers:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_dataflow_starting_num_workers:
parameterType: NUMBER_INTEGER
pipelinechannel--location:
parameterType: STRING
pipelinechannel--prediction_type:
parameterType: STRING
pipelinechannel--project:
parameterType: STRING
pipelinechannel--root_dir:
parameterType: STRING
pipelinechannel--string-not-empty-Output:
parameterType: STRING
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json:
parameterType: LIST
pipelinechannel--tabular-stats-and-example-gen-test_split_json:
parameterType: LIST
pipelinechannel--target_column:
parameterType: STRING
outputDefinitions:
artifacts:
feature-attribution-3-feature_attributions:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
model-evaluation-3-evaluation_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
comp-exit-handler-1:
dag:
outputs:
artifacts:
feature-attribution-2-feature_attributions:
artifactSelectors:
- outputArtifactKey: feature-attribution-2-feature_attributions
producerSubtask: condition-4
feature-attribution-3-feature_attributions:
artifactSelectors:
- outputArtifactKey: feature-attribution-3-feature_attributions
producerSubtask: condition-4
feature-attribution-feature_attributions:
artifactSelectors:
- outputArtifactKey: feature-attribution-feature_attributions
producerSubtask: condition-2
model-evaluation-2-evaluation_metrics:
artifactSelectors:
- outputArtifactKey: model-evaluation-2-evaluation_metrics
producerSubtask: condition-4
model-evaluation-3-evaluation_metrics:
artifactSelectors:
- outputArtifactKey: model-evaluation-3-evaluation_metrics
producerSubtask: condition-4
model-evaluation-evaluation_metrics:
artifactSelectors:
- outputArtifactKey: model-evaluation-evaluation_metrics
producerSubtask: condition-2
tasks:
automl-tabular-transform:
cachingOptions:
enableCache: true
componentRef:
name: comp-automl-tabular-transform
dependentTasks:
- tabular-stats-and-example-gen
inputs:
artifacts:
dataset_schema:
taskOutputArtifact:
outputArtifactKey: dataset_schema
producerTask: tabular-stats-and-example-gen
eval_split:
taskOutputArtifact:
outputArtifactKey: eval_split
producerTask: tabular-stats-and-example-gen
metadata:
taskOutputArtifact:
outputArtifactKey: metadata
producerTask: tabular-stats-and-example-gen
test_split:
taskOutputArtifact:
outputArtifactKey: test_split
producerTask: tabular-stats-and-example-gen
train_split:
taskOutputArtifact:
outputArtifactKey: train_split
producerTask: tabular-stats-and-example-gen
parameters:
dataflow_disk_size_gb:
componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb
dataflow_machine_type:
componentInputParameter: pipelinechannel--transform_dataflow_machine_type
dataflow_max_num_workers:
componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers
dataflow_service_account:
componentInputParameter: pipelinechannel--dataflow_service_account
dataflow_subnetwork:
componentInputParameter: pipelinechannel--dataflow_subnetwork
dataflow_use_public_ips:
componentInputParameter: pipelinechannel--dataflow_use_public_ips
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
location:
componentInputParameter: pipelinechannel--location
project:
componentInputParameter: pipelinechannel--project
root_dir:
componentInputParameter: pipelinechannel--root_dir
taskInfo:
name: automl-tabular-transform
condition-2:
componentRef:
name: comp-condition-2
dependentTasks:
- automl-tabular-transform
- merge-materialized-splits
- string-not-empty
- tabular-stats-and-example-gen
inputs:
artifacts:
pipelinechannel--automl-tabular-transform-transform_output:
taskOutputArtifact:
outputArtifactKey: transform_output
producerTask: automl-tabular-transform
pipelinechannel--merge-materialized-splits-splits:
taskOutputArtifact:
outputArtifactKey: splits
producerTask: merge-materialized-splits
pipelinechannel--parent_model:
componentInputArtifact: pipelinechannel--parent_model
pipelinechannel--tabular-stats-and-example-gen-dataset_schema:
taskOutputArtifact:
outputArtifactKey: dataset_schema
producerTask: tabular-stats-and-example-gen
pipelinechannel--tabular-stats-and-example-gen-eval_split:
taskOutputArtifact:
outputArtifactKey: eval_split
producerTask: tabular-stats-and-example-gen
pipelinechannel--tabular-stats-and-example-gen-instance_baseline:
taskOutputArtifact:
outputArtifactKey: instance_baseline
producerTask: tabular-stats-and-example-gen
pipelinechannel--tabular-stats-and-example-gen-metadata:
taskOutputArtifact:
outputArtifactKey: metadata
producerTask: tabular-stats-and-example-gen
parameters:
pipelinechannel--cv_trainer_worker_pool_specs_override:
componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override
pipelinechannel--dataflow_service_account:
componentInputParameter: pipelinechannel--dataflow_service_account
pipelinechannel--dataflow_subnetwork:
componentInputParameter: pipelinechannel--dataflow_subnetwork
pipelinechannel--dataflow_use_public_ips:
componentInputParameter: pipelinechannel--dataflow_use_public_ips
pipelinechannel--encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
pipelinechannel--evaluation_batch_explain_machine_type:
componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type
pipelinechannel--evaluation_batch_explain_max_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count
pipelinechannel--evaluation_batch_explain_starting_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count
pipelinechannel--evaluation_batch_predict_machine_type:
componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type
pipelinechannel--evaluation_batch_predict_max_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count
pipelinechannel--evaluation_batch_predict_starting_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count
pipelinechannel--evaluation_dataflow_disk_size_gb:
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb
pipelinechannel--evaluation_dataflow_machine_type:
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type
pipelinechannel--evaluation_dataflow_max_num_workers:
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers
pipelinechannel--evaluation_dataflow_starting_num_workers:
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers
pipelinechannel--export_additional_model_without_custom_ops:
componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops
pipelinechannel--fast_testing:
componentInputParameter: pipelinechannel--fast_testing
pipelinechannel--location:
componentInputParameter: pipelinechannel--location
pipelinechannel--model_description:
componentInputParameter: pipelinechannel--model_description
pipelinechannel--prediction_type:
componentInputParameter: pipelinechannel--prediction_type
pipelinechannel--project:
componentInputParameter: pipelinechannel--project
pipelinechannel--root_dir:
componentInputParameter: pipelinechannel--root_dir
pipelinechannel--run_distillation:
componentInputParameter: pipelinechannel--run_distillation
pipelinechannel--run_evaluation:
componentInputParameter: pipelinechannel--run_evaluation
pipelinechannel--set-optional-inputs-model_display_name:
componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name
pipelinechannel--stage_1_num_parallel_trials:
componentInputParameter: pipelinechannel--stage_1_num_parallel_trials
pipelinechannel--stage_1_tuning_result_artifact_uri:
componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri
pipelinechannel--stage_2_num_parallel_trials:
componentInputParameter: pipelinechannel--stage_2_num_parallel_trials
pipelinechannel--stage_2_num_selected_trials:
componentInputParameter: pipelinechannel--stage_2_num_selected_trials
pipelinechannel--string-not-empty-Output:
taskOutputParameter:
outputParameterKey: Output
producerTask: string-not-empty
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json:
taskOutputParameter:
outputParameterKey: downsampled_test_split_json
producerTask: tabular-stats-and-example-gen
pipelinechannel--tabular-stats-and-example-gen-test_split_json:
taskOutputParameter:
outputParameterKey: test_split_json
producerTask: tabular-stats-and-example-gen
pipelinechannel--target_column:
componentInputParameter: pipelinechannel--target_column
pipelinechannel--train_budget_milli_node_hours:
componentInputParameter: pipelinechannel--train_budget_milli_node_hours
taskInfo:
name: stage_1_tuning_result_artifact_uri_not_empty
triggerPolicy:
condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output']
== 'true'
condition-4:
componentRef:
name: comp-condition-4
dependentTasks:
- automl-tabular-transform
- merge-materialized-splits
- string-not-empty
- tabular-stats-and-example-gen
inputs:
artifacts:
pipelinechannel--automl-tabular-transform-materialized_eval_split:
taskOutputArtifact:
outputArtifactKey: materialized_eval_split
producerTask: automl-tabular-transform
pipelinechannel--automl-tabular-transform-materialized_train_split:
taskOutputArtifact:
outputArtifactKey: materialized_train_split
producerTask: automl-tabular-transform
pipelinechannel--automl-tabular-transform-transform_output:
taskOutputArtifact:
outputArtifactKey: transform_output
producerTask: automl-tabular-transform
pipelinechannel--merge-materialized-splits-splits:
taskOutputArtifact:
outputArtifactKey: splits
producerTask: merge-materialized-splits
pipelinechannel--parent_model:
componentInputArtifact: pipelinechannel--parent_model
pipelinechannel--tabular-stats-and-example-gen-dataset_schema:
taskOutputArtifact:
outputArtifactKey: dataset_schema
producerTask: tabular-stats-and-example-gen
pipelinechannel--tabular-stats-and-example-gen-eval_split:
taskOutputArtifact:
outputArtifactKey: eval_split
producerTask: tabular-stats-and-example-gen
pipelinechannel--tabular-stats-and-example-gen-instance_baseline:
taskOutputArtifact:
outputArtifactKey: instance_baseline
producerTask: tabular-stats-and-example-gen
pipelinechannel--tabular-stats-and-example-gen-metadata:
taskOutputArtifact:
outputArtifactKey: metadata
producerTask: tabular-stats-and-example-gen
pipelinechannel--tabular-stats-and-example-gen-test_split:
taskOutputArtifact:
outputArtifactKey: test_split
producerTask: tabular-stats-and-example-gen
pipelinechannel--tabular-stats-and-example-gen-train_split:
taskOutputArtifact:
outputArtifactKey: train_split
producerTask: tabular-stats-and-example-gen
parameters:
pipelinechannel--cv_trainer_worker_pool_specs_override:
componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override
pipelinechannel--dataflow_service_account:
componentInputParameter: pipelinechannel--dataflow_service_account
pipelinechannel--dataflow_subnetwork:
componentInputParameter: pipelinechannel--dataflow_subnetwork
pipelinechannel--dataflow_use_public_ips:
componentInputParameter: pipelinechannel--dataflow_use_public_ips
pipelinechannel--disable_early_stopping:
componentInputParameter: pipelinechannel--disable_early_stopping
pipelinechannel--distill_batch_predict_machine_type:
componentInputParameter: pipelinechannel--distill_batch_predict_machine_type
pipelinechannel--distill_batch_predict_max_replica_count:
componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count
pipelinechannel--distill_batch_predict_starting_replica_count:
componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count
pipelinechannel--encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
pipelinechannel--evaluation_batch_explain_machine_type:
componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type
pipelinechannel--evaluation_batch_explain_max_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count
pipelinechannel--evaluation_batch_explain_starting_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count
pipelinechannel--evaluation_batch_predict_machine_type:
componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type
pipelinechannel--evaluation_batch_predict_max_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count
pipelinechannel--evaluation_batch_predict_starting_replica_count:
componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count
pipelinechannel--evaluation_dataflow_disk_size_gb:
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb
pipelinechannel--evaluation_dataflow_machine_type:
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type
pipelinechannel--evaluation_dataflow_max_num_workers:
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers
pipelinechannel--evaluation_dataflow_starting_num_workers:
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers
pipelinechannel--export_additional_model_without_custom_ops:
componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops
pipelinechannel--fast_testing:
componentInputParameter: pipelinechannel--fast_testing
pipelinechannel--location:
componentInputParameter: pipelinechannel--location
pipelinechannel--model_description:
componentInputParameter: pipelinechannel--model_description
pipelinechannel--prediction_type:
componentInputParameter: pipelinechannel--prediction_type
pipelinechannel--project:
componentInputParameter: pipelinechannel--project
pipelinechannel--root_dir:
componentInputParameter: pipelinechannel--root_dir
pipelinechannel--run_distillation:
componentInputParameter: pipelinechannel--run_distillation
pipelinechannel--run_evaluation:
componentInputParameter: pipelinechannel--run_evaluation
pipelinechannel--set-optional-inputs-model_display_name:
componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name
pipelinechannel--stage_1_num_parallel_trials:
componentInputParameter: pipelinechannel--stage_1_num_parallel_trials
pipelinechannel--stage_1_tuner_worker_pool_specs_override:
componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override
pipelinechannel--stage_2_num_parallel_trials:
componentInputParameter: pipelinechannel--stage_2_num_parallel_trials
pipelinechannel--stage_2_num_selected_trials:
componentInputParameter: pipelinechannel--stage_2_num_selected_trials
pipelinechannel--string-not-empty-Output:
taskOutputParameter:
outputParameterKey: Output
producerTask: string-not-empty
pipelinechannel--study_spec_parameters_override:
componentInputParameter: pipelinechannel--study_spec_parameters_override
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json:
taskOutputParameter:
outputParameterKey: downsampled_test_split_json
producerTask: tabular-stats-and-example-gen
pipelinechannel--tabular-stats-and-example-gen-test_split_json:
taskOutputParameter:
outputParameterKey: test_split_json
producerTask: tabular-stats-and-example-gen
pipelinechannel--target_column:
componentInputParameter: pipelinechannel--target_column
pipelinechannel--train_budget_milli_node_hours:
componentInputParameter: pipelinechannel--train_budget_milli_node_hours
pipelinechannel--transform_dataflow_disk_size_gb:
componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb
pipelinechannel--transform_dataflow_machine_type:
componentInputParameter: pipelinechannel--transform_dataflow_machine_type
pipelinechannel--transform_dataflow_max_num_workers:
componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers
taskInfo:
name: stage_1_tuning_result_artifact_uri_empty
triggerPolicy:
condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output']
== 'false'
merge-materialized-splits:
cachingOptions:
enableCache: true
componentRef:
name: comp-merge-materialized-splits
dependentTasks:
- automl-tabular-transform
inputs:
artifacts:
split_0:
taskOutputArtifact:
outputArtifactKey: materialized_train_split
producerTask: automl-tabular-transform
split_1:
taskOutputArtifact:
outputArtifactKey: materialized_eval_split
producerTask: automl-tabular-transform
taskInfo:
name: merge-materialized-splits
string-not-empty:
cachingOptions:
enableCache: true
componentRef:
name: comp-string-not-empty
inputs:
parameters:
value:
componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri
taskInfo:
name: string-not-empty
tabular-stats-and-example-gen:
cachingOptions:
enableCache: true
componentRef:
name: comp-tabular-stats-and-example-gen
inputs:
parameters:
additional_experiments_json:
componentInputParameter: pipelinechannel--additional_experiments
data_source_bigquery_table_path:
componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path
data_source_csv_filenames:
componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames
dataflow_disk_size_gb:
componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb
dataflow_machine_type:
componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_machine_type
dataflow_max_num_workers:
componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_max_num_workers
dataflow_service_account:
componentInputParameter: pipelinechannel--dataflow_service_account
dataflow_subnetwork:
componentInputParameter: pipelinechannel--dataflow_subnetwork
dataflow_use_public_ips:
componentInputParameter: pipelinechannel--dataflow_use_public_ips
enable_probabilistic_inference:
componentInputParameter: pipelinechannel--enable_probabilistic_inference
encryption_spec_key_name:
componentInputParameter: pipelinechannel--encryption_spec_key_name
location:
componentInputParameter: pipelinechannel--location
optimization_objective:
componentInputParameter: pipelinechannel--optimization_objective
optimization_objective_precision_value:
componentInputParameter: pipelinechannel--optimization_objective_precision_value
optimization_objective_recall_value:
componentInputParameter: pipelinechannel--optimization_objective_recall_value
predefined_split_key:
componentInputParameter: pipelinechannel--predefined_split_key
prediction_type:
componentInputParameter: pipelinechannel--prediction_type
project:
componentInputParameter: pipelinechannel--project
quantiles:
componentInputParameter: pipelinechannel--quantiles
root_dir:
componentInputParameter: pipelinechannel--root_dir
run_distillation:
componentInputParameter: pipelinechannel--run_distillation
stratified_split_key:
componentInputParameter: pipelinechannel--stratified_split_key
target_column_name:
componentInputParameter: pipelinechannel--target_column
test_fraction:
componentInputParameter: pipelinechannel--test_fraction
timestamp_split_key:
componentInputParameter: pipelinechannel--timestamp_split_key
training_fraction:
componentInputParameter: pipelinechannel--training_fraction
transformations:
runtimeValue:
constant: '[]'
transformations_path:
componentInputParameter: pipelinechannel--transformations
validation_fraction:
componentInputParameter: pipelinechannel--validation_fraction
weight_column_name:
componentInputParameter: pipelinechannel--weight_column
taskInfo:
name: tabular-stats-and-example-gen
inputDefinitions:
artifacts:
pipelinechannel--parent_model:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
parameters:
pipelinechannel--additional_experiments:
parameterType: STRUCT
pipelinechannel--cv_trainer_worker_pool_specs_override:
parameterType: LIST
pipelinechannel--dataflow_service_account:
parameterType: STRING
pipelinechannel--dataflow_subnetwork:
parameterType: STRING
pipelinechannel--dataflow_use_public_ips:
parameterType: BOOLEAN
pipelinechannel--disable_early_stopping:
parameterType: BOOLEAN
pipelinechannel--distill_batch_predict_machine_type:
parameterType: STRING
pipelinechannel--distill_batch_predict_max_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--distill_batch_predict_starting_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--enable_probabilistic_inference:
parameterType: BOOLEAN
pipelinechannel--encryption_spec_key_name:
parameterType: STRING
pipelinechannel--evaluation_batch_explain_machine_type:
parameterType: STRING
pipelinechannel--evaluation_batch_explain_max_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_batch_explain_starting_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_batch_predict_machine_type:
parameterType: STRING
pipelinechannel--evaluation_batch_predict_max_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_batch_predict_starting_replica_count:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_dataflow_disk_size_gb:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_dataflow_machine_type:
parameterType: STRING
pipelinechannel--evaluation_dataflow_max_num_workers:
parameterType: NUMBER_INTEGER
pipelinechannel--evaluation_dataflow_starting_num_workers:
parameterType: NUMBER_INTEGER
pipelinechannel--export_additional_model_without_custom_ops:
parameterType: BOOLEAN
pipelinechannel--fast_testing:
parameterType: BOOLEAN
pipelinechannel--location:
parameterType: STRING
pipelinechannel--model_description:
parameterType: STRING
pipelinechannel--optimization_objective:
parameterType: STRING
pipelinechannel--optimization_objective_precision_value:
parameterType: NUMBER_DOUBLE
pipelinechannel--optimization_objective_recall_value:
parameterType: NUMBER_DOUBLE
pipelinechannel--predefined_split_key:
parameterType: STRING
pipelinechannel--prediction_type:
parameterType: STRING
pipelinechannel--project:
parameterType: STRING
pipelinechannel--quantiles:
parameterType: LIST
pipelinechannel--root_dir:
parameterType: STRING
pipelinechannel--run_distillation:
parameterType: BOOLEAN
pipelinechannel--run_evaluation:
parameterType: BOOLEAN
pipelinechannel--set-optional-inputs-data_source_bigquery_table_path:
parameterType: STRING
pipelinechannel--set-optional-inputs-data_source_csv_filenames:
parameterType: STRING
pipelinechannel--set-optional-inputs-model_display_name:
parameterType: STRING
pipelinechannel--stage_1_num_parallel_trials:
parameterType: NUMBER_INTEGER
pipelinechannel--stage_1_tuner_worker_pool_specs_override:
parameterType: LIST
pipelinechannel--stage_1_tuning_result_artifact_uri:
parameterType: STRING
pipelinechannel--stage_2_num_parallel_trials:
parameterType: NUMBER_INTEGER
pipelinechannel--stage_2_num_selected_trials:
parameterType: NUMBER_INTEGER
pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb:
parameterType: NUMBER_INTEGER
pipelinechannel--stats_and_example_gen_dataflow_machine_type:
parameterType: STRING
pipelinechannel--stats_and_example_gen_dataflow_max_num_workers:
parameterType: NUMBER_INTEGER
pipelinechannel--stratified_split_key:
parameterType: STRING
pipelinechannel--study_spec_parameters_override:
parameterType: LIST
pipelinechannel--target_column:
parameterType: STRING
pipelinechannel--test_fraction:
parameterType: NUMBER_DOUBLE
pipelinechannel--timestamp_split_key:
parameterType: STRING
pipelinechannel--train_budget_milli_node_hours:
parameterType: NUMBER_DOUBLE
pipelinechannel--training_fraction:
parameterType: NUMBER_DOUBLE
pipelinechannel--transform_dataflow_disk_size_gb:
parameterType: NUMBER_INTEGER
pipelinechannel--transform_dataflow_machine_type:
parameterType: STRING
pipelinechannel--transform_dataflow_max_num_workers:
parameterType: NUMBER_INTEGER
pipelinechannel--transformations:
parameterType: STRING
pipelinechannel--validation_fraction:
parameterType: NUMBER_DOUBLE
pipelinechannel--weight_column:
parameterType: STRING
outputDefinitions:
artifacts:
feature-attribution-2-feature_attributions:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
feature-attribution-3-feature_attributions:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
feature-attribution-feature_attributions:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
model-evaluation-2-evaluation_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
model-evaluation-3-evaluation_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
model-evaluation-evaluation_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
comp-feature-attribution:
executorLabel: exec-feature-attribution
inputDefinitions:
artifacts:
predictions_bigquery_source:
artifactType:
schemaTitle: google.BQTable
schemaVersion: 0.0.1
description: 'BigQuery table
with prediction or explanation data to be used for this evaluation. For
prediction results, the table column should be named "predicted_*".'
isOptional: true
predictions_gcs_source:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: 'An artifact with its
URI pointing toward a GCS directory with prediction or explanation files
to be used for this evaluation. For prediction results, the files should
be named "prediction.results-*" or "predictions_". For explanation
results, the files should be named "explanation.results-*".'
isOptional: true
parameters:
dataflow_disk_size:
defaultValue: 50.0
description: 'The disk size (in GB) of the machine
executing the evaluation run. If not set, defaulted to `50`.'
isOptional: true
parameterType: NUMBER_INTEGER
dataflow_machine_type:
defaultValue: n1-standard-4
description: 'The machine type executing the
evaluation run. If not set, defaulted to `n1-standard-4`.'
isOptional: true
parameterType: STRING
dataflow_max_workers_num:
defaultValue: 5.0
description: 'The max number of workers
executing the evaluation run. If not set, defaulted to `25`.'
isOptional: true
parameterType: NUMBER_INTEGER
dataflow_service_account:
defaultValue: ''
description: 'Service account to run the
dataflow job. If not set, dataflow will use the default worker service
account. For more details, see
https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account'
isOptional: true
parameterType: STRING
dataflow_subnetwork:
defaultValue: ''
description: 'Dataflow''s fully qualified subnetwork
name, when empty the default subnetwork will be used. More details:
https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications'
isOptional: true
parameterType: STRING
dataflow_use_public_ips:
defaultValue: true
description: 'Specifies whether Dataflow
workers use public IP addresses.'
isOptional: true
parameterType: BOOLEAN
dataflow_workers_num:
defaultValue: 1.0
description: 'The number of workers executing the
evaluation run. If not set, defaulted to `10`.'
isOptional: true
parameterType: NUMBER_INTEGER
encryption_spec_key_name:
defaultValue: ''
description: 'Customer-managed encryption key
for the Dataflow job. If this is set, then all resources created by the
Dataflow job will be encrypted with the provided encryption key.'
isOptional: true
parameterType: STRING
force_direct_runner:
defaultValue: false
description: 'Flag to use Beam DirectRunner. If set to true,
use Apache Beam DirectRunner to execute the task locally instead of
launching a Dataflow job.'
isOptional: true
parameterType: BOOLEAN
location:
defaultValue: us-central1
description: 'Location running feature attribution. If not
set, defaulted to `us-central1`.'
isOptional: true
parameterType: STRING
predictions_format:
defaultValue: jsonl
description: 'The file format for the batch
prediction results. `jsonl`, `csv`, and `bigquery` are the allowed
formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.'
isOptional: true
parameterType: STRING
project:
description: Project to run feature attribution container.
parameterType: STRING
outputDefinitions:
artifacts:
feature_attributions:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
parameters:
gcp_resources:
description: 'Serialized gcp_resources proto tracking the dataflow
job. For more details, see
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.'
parameterType: STRING
comp-feature-attribution-2:
executorLabel: exec-feature-attribution-2
inputDefinitions:
artifacts:
predictions_bigquery_source:
artifactType:
schemaTitle: google.BQTable
schemaVersion: 0.0.1
description: 'BigQuery table
with prediction or explanation data to be used for this evaluation. For
prediction results, the table column should be named "predicted_*".'
isOptional: true
predictions_gcs_source:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: 'An artifact with its
URI pointing toward a GCS directory with prediction or explanation files
to be used for this evaluation. For prediction results, the files should
be named "prediction.results-*" or "predictions_". For explanation
results, the files should be named "explanation.results-*".'
isOptional: true
parameters:
dataflow_disk_size:
defaultValue: 50.0
description: 'The disk size (in GB) of the machine
executing the evaluation run. If not set, defaulted to `50`.'
isOptional: true
parameterType: NUMBER_INTEGER
dataflow_machine_type:
defaultValue: n1-standard-4
description: 'The machine type executing the
evaluation run. If not set, defaulted to `n1-standard-4`.'
isOptional: true
parameterType: STRING
dataflow_max_workers_num:
defaultValue: 5.0
description: 'The max number of workers
executing the evaluation run. If not set, defaulted to `25`.'
isOptional: true
parameterType: NUMBER_INTEGER
dataflow_service_account:
defaultValue: ''
description: 'Service account to run the
dataflow job. If not set, dataflow will use the default worker service
account. For more details, see
https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account'
isOptional: true
parameterType: STRING
dataflow_subnetwork:
defaultValue: ''
description: 'Dataflow''s fully qualified subnetwork
name, when empty the default subnetwork will be used. More details:
https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications'
isOptional: true
parameterType: STRING
dataflow_use_public_ips:
defaultValue: true
description: 'Specifies whether Dataflow
workers use public IP addresses.'
isOptional: true
parameterType: BOOLEAN
dataflow_workers_num:
defaultValue: 1.0
description: 'The number of workers executing the
evaluation run. If not set, defaulted to `10`.'
isOptional: true
parameterType: NUMBER_INTEGER
encryption_spec_key_name:
defaultValue: ''
description: 'Customer-managed encryption key
for the Dataflow job. If this is set, then all resources created by the
Dataflow job will be encrypted with the provided encryption key.'
isOptional: true
parameterType: STRING
force_direct_runner:
defaultValue: false
description: 'Flag to use Beam DirectRunner. If set to true,
use Apache Beam DirectRunner to execute the task locally instead of
launching a Dataflow job.'
isOptional: true
parameterType: BOOLEAN
location:
defaultValue: us-central1
description: 'Location running feature attribution. If not
set, defaulted to `us-central1`.'
isOptional: true
parameterType: STRING
predictions_format:
defaultValue: jsonl
description: 'The file format for the batch
prediction results. `jsonl`, `csv`, and `bigquery` are the allowed
formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.'
isOptional: true
parameterType: STRING
project:
description: Project to run feature attribution container.
parameterType: STRING
outputDefinitions:
artifacts:
feature_attributions:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
parameters:
gcp_resources:
description: 'Serialized gcp_resources proto tracking the dataflow
job. For more details, see
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.'
parameterType: STRING
comp-feature-attribution-3:
executorLabel: exec-feature-attribution-3
inputDefinitions:
artifacts:
predictions_bigquery_source:
artifactType:
schemaTitle: google.BQTable
schemaVersion: 0.0.1
description: 'BigQuery table
with prediction or explanation data to be used for this evaluation. For
prediction results, the table column should be named "predicted_*".'
isOptional: true
predictions_gcs_source:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: 'An artifact with its
URI pointing toward a GCS directory with prediction or explanation files
to be used for this evaluation. For prediction results, the files should
be named "prediction.results-*" or "predictions_". For explanation
results, the files should be named "explanation.results-*".'
isOptional: true
parameters:
dataflow_disk_size:
defaultValue: 50.0
description: 'The disk size (in GB) of the machine
executing the evaluation run. If not set, defaulted to `50`.'
isOptional: true
parameterType: NUMBER_INTEGER
dataflow_machine_type:
defaultValue: n1-standard-4
description: 'The machine type executing the
evaluation run. If not set, defaulted to `n1-standard-4`.'
isOptional: true
parameterType: STRING
dataflow_max_workers_num:
defaultValue: 5.0
description: 'The max number of workers
executing the evaluation run. If not set, defaulted to `25`.'
isOptional: true
parameterType: NUMBER_INTEGER
dataflow_service_account:
defaultValue: ''
description: 'Service account to run the
dataflow job. If not set, dataflow will use the default worker service
account. For more details, see
https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account'
isOptional: true
parameterType: STRING
dataflow_subnetwork:
defaultValue: ''
description: 'Dataflow''s fully qualified subnetwork
name, when empty the default subnetwork will be used. More details:
https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications'
isOptional: true
parameterType: STRING
dataflow_use_public_ips:
defaultValue: true
description: 'Specifies whether Dataflow
workers use public IP addresses.'
isOptional: true
parameterType: BOOLEAN
dataflow_workers_num:
defaultValue: 1.0
description: 'The number of workers executing the
evaluation run. If not set, defaulted to `10`.'
isOptional: true
parameterType: NUMBER_INTEGER
encryption_spec_key_name:
defaultValue: ''
description: 'Customer-managed encryption key
for the Dataflow job. If this is set, then all resources created by the
Dataflow job will be encrypted with the provided encryption key.'
isOptional: true
parameterType: STRING
force_direct_runner:
defaultValue: false
description: 'Flag to use Beam DirectRunner. If set to true,
use Apache Beam DirectRunner to execute the task locally instead of
launching a Dataflow job.'
isOptional: true
parameterType: BOOLEAN
location:
defaultValue: us-central1
description: 'Location running feature attribution. If not
set, defaulted to `us-central1`.'
isOptional: true
parameterType: STRING
predictions_format:
defaultValue: jsonl
description: 'The file format for the batch
prediction results. `jsonl`, `csv`, and `bigquery` are the allowed
formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.'
isOptional: true
parameterType: STRING
project:
description: Project to run feature attribution container.
parameterType: STRING
outputDefinitions:
artifacts:
feature_attributions:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
parameters:
gcp_resources:
description: 'Serialized gcp_resources proto tracking the dataflow
job. For more details, see
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.'
parameterType: STRING
comp-importer:
executorLabel: exec-importer
inputDefinitions:
parameters:
uri:
parameterType: STRING
outputDefinitions:
artifacts:
artifact:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
comp-merge-materialized-splits:
executorLabel: exec-merge-materialized-splits
inputDefinitions:
artifacts:
split_0:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The first materialized split.
split_1:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The second materialized split.
outputDefinitions:
artifacts:
splits:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
comp-model-batch-explanation:
executorLabel: exec-model-batch-explanation
inputDefinitions:
artifacts:
explanation_metadata_artifact:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
isOptional: true
unmanaged_container_model:
artifactType:
schemaTitle: google.UnmanagedContainerModel
schemaVersion: 0.0.1
isOptional: true
parameters:
accelerator_count:
defaultValue: 0.0
isOptional: true
parameterType: NUMBER_INTEGER
accelerator_type:
defaultValue: ''
isOptional: true
parameterType: STRING
bigquery_destination_output_uri:
defaultValue: ''
isOptional: true
parameterType: STRING
bigquery_source_input_uri:
defaultValue: ''
isOptional: true
parameterType: STRING
encryption_spec_key_name:
defaultValue: ''
isOptional: true
parameterType: STRING
explanation_metadata:
defaultValue: {}
isOptional: true
parameterType: STRUCT
explanation_parameters:
defaultValue: {}
isOptional: true
parameterType: STRUCT
gcs_destination_output_uri_prefix:
defaultValue: ''
isOptional: true
parameterType: STRING
gcs_source_uris:
defaultValue: []
isOptional: true
parameterType: LIST
generate_explanation:
defaultValue: false
isOptional: true
parameterType: BOOLEAN
instances_format:
defaultValue: jsonl
isOptional: true
parameterType: STRING
job_display_name:
parameterType: STRING
labels:
defaultValue: {}
isOptional: true
parameterType: STRUCT
location:
defaultValue: us-central1
isOptional: true
parameterType: STRING
machine_type:
defaultValue: ''
isOptional: true
parameterType: STRING
manual_batch_tuning_parameters_batch_size:
defaultValue: 0.0
isOptional: true
parameterType: NUMBER_INTEGER
max_replica_count:
defaultValue: 0.0
isOptional: true
parameterType: NUMBER_INTEGER
model_parameters:
defaultValue: {}
isOptional: true
parameterType: STRUCT
predictions_format:
defaultValue: jsonl
isOptional: true
parameterType: STRING
project:
parameterType: STRING
starting_replica_count:
defaultValue: 0.0
isOptional: true
parameterType: NUMBER_INTEGER
outputDefinitions:
artifacts:
batchpredictionjob:
artifactType:
schemaTitle: google.VertexBatchPredictionJob
schemaVersion: 0.0.1
bigquery_output_table:
artifactType:
schemaTitle: google.BQTable
schemaVersion: 0.0.1
gcs_output_directory:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
parameters:
gcp_resources:
parameterType: STRING
comp-model-batch-explanation-2:
executorLabel: exec-model-batch-explanation-2
inputDefinitions:
artifacts:
explanation_metadata_artifact:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
isOptional: true
unmanaged_container_model:
artifactType:
schemaTitle: google.UnmanagedContainerModel
schemaVersion: 0.0.1
isOptional: true
parameters:
accelerator_count:
defaultValue: 0.0
isOptional: true
parameterType: NUMBER_INTEGER
accelerator_type:
defaultValue: ''
isOptional: true
parameterType: STRING
bigquery_destination_output_uri:
defaultValue: ''
isOptional: true
parameterType: STRING
bigquery_source_input_uri:
defaultValue: ''
isOptional: true
parameterType: STRING
encryption_spec_key_name:
defaultValue: ''
isOptional: true
parameterType: STRING
explanation_metadata:
defaultValue: {}
isOptional: true
parameterType: STRUCT
explanation_parameters:
defaultValue: {}
isOptional: true
parameterType: STRUCT
gcs_destination_output_uri_prefix:
defaultValue: ''
isOptional: true
parameterType: STRING
gcs_source_uris:
defaultValue: []
isOptional: true
parameterType: LIST
generate_explanation:
defaultValue: false
isOptional: true
parameterType: BOOLEAN
instances_format:
defaultValue: jsonl
isOptional: true
parameterType: STRING
job_display_name:
parameterType: STRING
labels:
defaultValue: {}
isOptional: true
parameterType: STRUCT
location:
defaultValue: us-central1
isOptional: true
parameterType: STRING
machine_type:
defaultValue: ''
isOptional: true
parameterType: STRING
manual_batch_tuning_parameters_batch_size:
defaultValue: 0.0
isOptional: true
parameterType: NUMBER_INTEGER
max_replica_count:
defaultValue: 0.0
isOptional: true
parameterType: NUMBER_INTEGER
model_parameters:
defaultValue: {}
isOptional: true
parameterType: STRUCT
predictions_format:
defaultValue: jsonl
isOptional: true
parameterType: STRING
project:
parameterType: STRING
starting_replica_count:
defaultValue: 0.0
isOptional: true
parameterType: NUMBER_INTEGER
outputDefinitions:
artifacts:
batchpredictionjob:
artifactType:
schemaTitle: google.VertexBatchPredictionJob
schemaVersion: 0.0.1
bigquery_output_table:
artifactType:
schemaTitle: google.BQTable
schemaVersion: 0.0.1
gcs_output_directory:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
parameters:
gcp_resources:
parameterType: STRING
comp-model-batch-explanation-3:
executorLabel: exec-model-batch-explanation-3
inputDefinitions:
artifacts:
explanation_metadata_artifact:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
isOptional: true
unmanaged_container_model:
artifactType:
schemaTitle: google.UnmanagedContainerModel
schemaVersion: 0.0.1
isOptional: true
parameters:
accelerator_count:
defaultValue: 0.0
isOptional: true
parameterType: NUMBER_INTEGER
accelerator_type:
defaultValue: ''
isOptional: true
parameterType: STRING
bigquery_destination_output_uri:
defaultValue: ''
isOptional: true
parameterType: STRING
bigquery_source_input_uri:
defaultValue: ''
isOptional: true
parameterType: STRING
encryption_spec_key_name:
defaultValue: ''
isOptional: true
parameterType: STRING
explanation_metadata:
defaultValue: {}
isOptional: true
parameterType: STRUCT
explanation_parameters:
defaultValue: {}
isOptional: true
parameterType: STRUCT
gcs_destination_output_uri_prefix:
defaultValue: ''
isOptional: true
parameterType: STRING
gcs_source_uris:
defaultValue: []
isOptional: true
parameterType: LIST
generate_explanation:
defaultValue: false
isOptional: true
parameterType: BOOLEAN
instances_format:
defaultValue: jsonl
isOptional: true
parameterType: STRING
job_display_name:
parameterType: STRING
labels:
defaultValue: {}
isOptional: true
parameterType: STRUCT
location:
defaultValue: us-central1
isOptional: true
parameterType: STRING
machine_type:
defaultValue: ''
isOptional: true
parameterType: STRING
manual_batch_tuning_parameters_batch_size:
defaultValue: 0.0
isOptional: true
parameterType: NUMBER_INTEGER
max_replica_count:
defaultValue: 0.0
isOptional: true
parameterType: NUMBER_INTEGER
model_parameters:
defaultValue: {}
isOptional: true
parameterType: STRUCT
predictions_format:
defaultValue: jsonl
isOptional: true
parameterType: STRING
project:
parameterType: STRING
starting_replica_count:
defaultValue: 0.0
isOptional: true
parameterType: NUMBER_INTEGER
outputDefinitions:
artifacts:
batchpredictionjob:
artifactType:
schemaTitle: google.VertexBatchPredictionJob
schemaVersion: 0.0.1
bigquery_output_table:
artifactType:
schemaTitle: google.BQTable
schemaVersion: 0.0.1
gcs_output_directory:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
parameters:
gcp_resources:
parameterType: STRING
comp-model-batch-predict:
executorLabel: exec-model-batch-predict
inputDefinitions:
artifacts:
model:
artifactType:
schemaTitle: google.VertexModel
schemaVersion: 0.0.1
description: 'The Model used to get predictions via this job. Must share
the same
ancestor Location. Starting this job has no impact on any existing
deployments of the Model and their resources. Either this or
unmanaged_container_model must be specified.'
isOptional: true
unmanaged_container_model:
artifactType:
schemaTitle: google.UnmanagedContainerModel
schemaVersion: 0.0.1
description: 'The unmanaged container model used to get predictions via
this job.
This should be used for models that are not uploaded to Vertex. Either
this or model must be specified.'
isOptional: true
parameters:
accelerator_count:
defaultValue: 0.0
description: 'The number of accelerators to attach
to the `machine_type`. Only used if `machine_type` is set. For more
details about the machine spec, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec'
isOptional: true
parameterType: NUMBER_INTEGER
accelerator_type:
defaultValue: ''
description: 'The type of accelerator(s) that may be
attached to the machine as per `accelerator_count`. Only used if
`machine_type` is set. For more details about the machine spec, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec'
isOptional: true
parameterType: STRING
bigquery_destination_output_uri:
defaultValue: ''
description: 'The BigQuery project location where the output is to be written
to. In
the given project a new dataset is created with name
``prediction_<model-display-name>_<job-create-time>`` where is made
BigQuery-dataset-name compatible (for example, most special characters
become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ
"based on ISO-8601" format. In the dataset two tables will be created,
``predictions``, and ``errors``. If the Model has both ``instance``
and ``prediction`` schemata defined then the tables have columns as
follows: The ``predictions`` table contains instances for which the
prediction succeeded, it has columns as per a concatenation of the
Model''s instance and prediction schemata. The ``errors`` table
contains rows for which the prediction has failed, it has instance
columns, as per the instance schema, followed by a single "errors"
column, which as values has ```google.rpc.Status`` <Status>`__
represented as a STRUCT, and containing only ``code`` and
``message``. For more details about this output config, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.'
isOptional: true
parameterType: STRING
bigquery_source_input_uri:
defaultValue: ''
description: 'BigQuery URI to a table, up to 2000 characters long. For example:
`projectId.bqDatasetId.bqTableId` For more details about this input
config, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.'
isOptional: true
parameterType: STRING
encryption_spec_key_name:
defaultValue: ''
description: 'Customer-managed encryption
key options for a BatchPredictionJob. If this is set, then all
resources created by the BatchPredictionJob will be encrypted with the
provided encryption key. Has the form:
``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``.
The key needs to be in the same region as where the compute resource
is created.'
isOptional: true
parameterType: STRING
excluded_fields:
defaultValue: []
description: 'Fields that will be excluded in the prediction instance that
is
sent to the Model.
Excluded will be attached to the batch prediction output if
[key_field][] is not specified.
When excluded_fields is populated, [included_fields][] must be empty.
The input must be JSONL with objects at each line, CSV, BigQuery
or TfRecord.
may be specified via the Model''s `parameters_schema_uri`.'
isOptional: true
parameterType: LIST
explanation_metadata:
defaultValue: {}
description: 'Explanation metadata
configuration for this BatchPredictionJob. Can be specified only if
`generate_explanation` is set to `True`. This value overrides the
value of `Model.explanation_metadata`. All fields of
`explanation_metadata` are optional in the request. If a field of the
`explanation_metadata` object is not populated, the corresponding
field of the `Model.explanation_metadata` object is inherited. For
more details, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.'
isOptional: true
parameterType: STRUCT
explanation_parameters:
defaultValue: {}
description: 'Parameters to configure
explaining for Model''s predictions. Can be specified only if
`generate_explanation` is set to `True`. This value overrides the
value of `Model.explanation_parameters`. All fields of
`explanation_parameters` are optional in the request. If a field of
the `explanation_parameters` object is not populated, the
corresponding field of the `Model.explanation_parameters` object is
inherited. For more details, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.'
isOptional: true
parameterType: STRUCT
gcs_destination_output_uri_prefix:
defaultValue: ''
description: 'The Google Cloud
Storage location of the directory where the output is to be written
to. In the given directory a new directory is created. Its name is
``prediction-<model-display-name>-<job-create-time>``, where timestamp
is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files
``predictions_0001.<extension>``, ``predictions_0002.<extension>``,
..., ``predictions_N.<extension>`` are created where ``<extension>``
depends on chosen ``predictions_format``, and N may equal 0001 and
depends on the total number of successfully predicted instances. If
the Model has both ``instance`` and ``prediction`` schemata defined
then each such file contains predictions as per the
``predictions_format``. If prediction for any instance failed
(partially or completely), then an additional
``errors_0001.<extension>``, ``errors_0002.<extension>``,...,
``errors_N.<extension>`` files are created (N depends on total number
of failed predictions). These files contain the failed instances, as
per their schema, followed by an additional ``error`` field which as
value has ``google.rpc.Status`` containing only ``code`` and
``message`` fields. For more details about this output config, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.'
isOptional: true
parameterType: STRING
gcs_source_uris:
defaultValue: []
description: "Google Cloud Storage URI(-s) to your instances to run batch\
\ prediction\non. They must match `instances_format`. May contain wildcards.\
\ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\
For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig."
isOptional: true
parameterType: LIST
generate_explanation:
defaultValue: false
description: 'Generate explanation along with
the batch prediction results. This will cause the batch prediction
output to include explanations based on the `prediction_format`: -
`bigquery`: output includes a column named `explanation`. The value is
a struct that conforms to the [aiplatform.gapic.Explanation] object. -
`jsonl`: The JSON objects on each line include an additional entry
keyed `explanation`. The value of the entry is a JSON object that
conforms to the [aiplatform.gapic.Explanation] object. - `csv`:
Generating explanations for CSV format is not supported. If this
field is set to true, either the Model.explanation_spec or
explanation_metadata and explanation_parameters must be populated.'
isOptional: true
parameterType: BOOLEAN
included_fields:
defaultValue: []
description: 'Fields that will be included in the prediction instance that
is
sent to the Model.
If [instance_type][] is `array`, the order of field names in
included_fields also determines the order of the values in the array.
When included_fields is populated, [excluded_fields][] must be empty.
The input must be JSONL with objects at each line, CSV, BigQuery
or TfRecord.'
isOptional: true
parameterType: LIST
instance_type:
defaultValue: ''
description: "The format of the instance that the Model accepts. Vertex\
\ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\
to the specified format.\nSupported values are:\n** `object`: Each input\
\ is converted to JSON object format.\n* For `bigquery`, each row is converted\
\ to an object.\n* For `jsonl`, each line of the JSONL input must be an\
\ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\
** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\
\ each row is converted to an array. The order\n of columns is determined\
\ by the BigQuery column order, unless\n [included_fields][] is populated.\n\
\ [included_fields][] must be populated for specifying field orders.\n\
* For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\
\ must be populated for specifying field orders.\n* Does not apply to\
\ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\
\ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\
\ and `csv`, the behavior is the same as `array`. The\n order of columns\
\ is the same as defined in the file or table, unless\n [included_fields][]\
\ is populated.\n * For `jsonl`, the prediction instance format is determined\
\ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\
\ each record will be converted to\n an object in the format of `{\"\
b64\": <value>}`, where `<value>` is\n the Base64-encoded string of\
\ the content of the record.\n * For `file-list`, each file in the list\
\ will be converted to an\n object in the format of `{\"b64\": <value>}`,\
\ where `<value>` is\n the Base64-encoded string of the content of the\
\ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\
\ Base64 is not for this field. --)"
isOptional: true
parameterType: STRING
instances_format:
defaultValue: jsonl
description: "The format in which instances are\ngiven, must be one of the\
\ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\
. For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig."
isOptional: true
parameterType: STRING
job_display_name:
description: The user-defined name of this BatchPredictionJob.
parameterType: STRING
key_field:
defaultValue: ''
description: "The name of the field that is considered as a key.\nThe values\
\ identified by the key field is not included in the\ntransformed instances\
\ that is sent to the Model. This is similar to\nspecifying this name\
\ of the field in [excluded_fields][]. In addition,\nthe batch prediction\
\ output will not include the instances. Instead the\noutput will only\
\ include the value of the key field, in a field named\n`key` in the output:\n\
\ * For `jsonl` output format, the output will have a `key` field\n \
\ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\
\ the output will have have a `key`\n column instead of the instance\
\ feature columns.\nThe input must be JSONL with objects at each line,\
\ CSV, BigQuery\nor TfRecord."
isOptional: true
parameterType: STRING
labels:
defaultValue: {}
description: 'The labels with user-defined metadata to
organize your BatchPredictionJobs. Label keys and values can be no
longer than 64 characters (Unicode codepoints), can only contain
lowercase letters, numeric characters, underscores and dashes.
International characters are allowed. See https://goo.gl/xmQnxf for
more information and examples of labels.'
isOptional: true
parameterType: STRUCT
location:
defaultValue: us-central1
description: 'Location for creating the BatchPredictionJob.
If not set, default to us-central1.'
isOptional: true
parameterType: STRING
machine_type:
defaultValue: ''
description: 'The type of machine for running batch
prediction on dedicated resources. If the Model supports
DEDICATED_RESOURCES this config may be provided (and the job will use
these resources). If the Model doesn''t support AUTOMATIC_RESOURCES,
this config must be provided. For more details about the
BatchDedicatedResources, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources.
For more details about the machine spec, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec'
isOptional: true
parameterType: STRING
manual_batch_tuning_parameters_batch_size:
defaultValue: 0.0
description: 'The number of
the records (e.g. instances) of the operation given in each batch to a
machine replica. Machine type, and size of a single record should be
considered when setting this parameter, higher value speeds up the
batch operation''s execution, but too high value will result in a whole
batch not fitting in a machine''s memory, and the whole operation will
fail. The default value is 4.'
isOptional: true
parameterType: NUMBER_INTEGER
max_replica_count:
defaultValue: 0.0
description: 'The maximum number of machine replicas the batch operation
may be scaled
to. Only used if `machine_type` is set. Default is 10.'
isOptional: true
parameterType: NUMBER_INTEGER
model_parameters:
defaultValue: {}
description: The parameters that govern the predictions. The schema of the
parameters
isOptional: true
parameterType: STRUCT
predictions_format:
defaultValue: jsonl
description: "The format in which Vertex AI gives the predictions. Must\
\ be one of the\nModel's supportedOutputStorageFormats. If not set, default\
\ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig."
isOptional: true
parameterType: STRING
project:
description: Project to create the BatchPredictionJob.
parameterType: STRING
starting_replica_count:
defaultValue: 0.0
description: 'The number of machine replicas
used at the start of the batch operation. If not set, Vertex AI
decides starting number, not greater than `max_replica_count`. Only
used if `machine_type` is set.'
isOptional: true
parameterType: NUMBER_INTEGER
outputDefinitions:
artifacts:
batchpredictionjob:
artifactType:
schemaTitle: google.VertexBatchPredictionJob
schemaVersion: 0.0.1
description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table
instead.**] Artifact
representation of the created batch prediction job.'
bigquery_output_table:
artifactType:
schemaTitle: google.BQTable
schemaVersion: 0.0.1
description: 'Artifact tracking the batch prediction job output. This is
only
available if
bigquery_output_table is specified.'
gcs_output_directory:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: 'Artifact tracking the batch prediction job output. This is
only
available if
gcs_destination_output_uri_prefix is specified.'
parameters:
gcp_resources:
description: 'Serialized gcp_resources proto tracking the batch prediction
job.
For more details, see
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.'
parameterType: STRING
comp-model-batch-predict-2:
executorLabel: exec-model-batch-predict-2
inputDefinitions:
artifacts:
model:
artifactType:
schemaTitle: google.VertexModel
schemaVersion: 0.0.1
description: 'The Model used to get predictions via this job. Must share
the same
ancestor Location. Starting this job has no impact on any existing
deployments of the Model and their resources. Either this or
unmanaged_container_model must be specified.'
isOptional: true
unmanaged_container_model:
artifactType:
schemaTitle: google.UnmanagedContainerModel
schemaVersion: 0.0.1
description: 'The unmanaged container model used to get predictions via
this job.
This should be used for models that are not uploaded to Vertex. Either
this or model must be specified.'
isOptional: true
parameters:
accelerator_count:
defaultValue: 0.0
description: 'The number of accelerators to attach
to the `machine_type`. Only used if `machine_type` is set. For more
details about the machine spec, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec'
isOptional: true
parameterType: NUMBER_INTEGER
accelerator_type:
defaultValue: ''
description: 'The type of accelerator(s) that may be
attached to the machine as per `accelerator_count`. Only used if
`machine_type` is set. For more details about the machine spec, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec'
isOptional: true
parameterType: STRING
bigquery_destination_output_uri:
defaultValue: ''
description: 'The BigQuery project location where the output is to be written
to. In
the given project a new dataset is created with name
``prediction_<model-display-name>_<job-create-time>`` where is made
BigQuery-dataset-name compatible (for example, most special characters
become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ
"based on ISO-8601" format. In the dataset two tables will be created,
``predictions``, and ``errors``. If the Model has both ``instance``
and ``prediction`` schemata defined then the tables have columns as
follows: The ``predictions`` table contains instances for which the
prediction succeeded, it has columns as per a concatenation of the
Model''s instance and prediction schemata. The ``errors`` table
contains rows for which the prediction has failed, it has instance
columns, as per the instance schema, followed by a single "errors"
column, which as values has ```google.rpc.Status`` <Status>`__
represented as a STRUCT, and containing only ``code`` and
``message``. For more details about this output config, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.'
isOptional: true
parameterType: STRING
bigquery_source_input_uri:
defaultValue: ''
description: 'BigQuery URI to a table, up to 2000 characters long. For example:
`projectId.bqDatasetId.bqTableId` For more details about this input
config, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.'
isOptional: true
parameterType: STRING
encryption_spec_key_name:
defaultValue: ''
description: 'Customer-managed encryption
key options for a BatchPredictionJob. If this is set, then all
resources created by the BatchPredictionJob will be encrypted with the
provided encryption key. Has the form:
``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``.
The key needs to be in the same region as where the compute resource
is created.'
isOptional: true
parameterType: STRING
excluded_fields:
defaultValue: []
description: 'Fields that will be excluded in the prediction instance that
is
sent to the Model.
Excluded will be attached to the batch prediction output if
[key_field][] is not specified.
When excluded_fields is populated, [included_fields][] must be empty.
The input must be JSONL with objects at each line, CSV, BigQuery
or TfRecord.
may be specified via the Model''s `parameters_schema_uri`.'
isOptional: true
parameterType: LIST
explanation_metadata:
defaultValue: {}
description: 'Explanation metadata
configuration for this BatchPredictionJob. Can be specified only if
`generate_explanation` is set to `True`. This value overrides the
value of `Model.explanation_metadata`. All fields of
`explanation_metadata` are optional in the request. If a field of the
`explanation_metadata` object is not populated, the corresponding
field of the `Model.explanation_metadata` object is inherited. For
more details, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.'
isOptional: true
parameterType: STRUCT
explanation_parameters:
defaultValue: {}
description: 'Parameters to configure
explaining for Model''s predictions. Can be specified only if
`generate_explanation` is set to `True`. This value overrides the
value of `Model.explanation_parameters`. All fields of
`explanation_parameters` are optional in the request. If a field of
the `explanation_parameters` object is not populated, the
corresponding field of the `Model.explanation_parameters` object is
inherited. For more details, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.'
isOptional: true
parameterType: STRUCT
gcs_destination_output_uri_prefix:
defaultValue: ''
description: 'The Google Cloud
Storage location of the directory where the output is to be written
to. In the given directory a new directory is created. Its name is
``prediction-<model-display-name>-<job-create-time>``, where timestamp
is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files
``predictions_0001.<extension>``, ``predictions_0002.<extension>``,
..., ``predictions_N.<extension>`` are created where ``<extension>``
depends on chosen ``predictions_format``, and N may equal 0001 and
depends on the total number of successfully predicted instances. If
the Model has both ``instance`` and ``prediction`` schemata defined
then each such file contains predictions as per the
``predictions_format``. If prediction for any instance failed
(partially or completely), then an additional
``errors_0001.<extension>``, ``errors_0002.<extension>``,...,
``errors_N.<extension>`` files are created (N depends on total number
of failed predictions). These files contain the failed instances, as
per their schema, followed by an additional ``error`` field which as
value has ``google.rpc.Status`` containing only ``code`` and
``message`` fields. For more details about this output config, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.'
isOptional: true
parameterType: STRING
gcs_source_uris:
defaultValue: []
description: "Google Cloud Storage URI(-s) to your instances to run batch\
\ prediction\non. They must match `instances_format`. May contain wildcards.\
\ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\
For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig."
isOptional: true
parameterType: LIST
generate_explanation:
defaultValue: false
description: 'Generate explanation along with
the batch prediction results. This will cause the batch prediction
output to include explanations based on the `prediction_format`: -
`bigquery`: output includes a column named `explanation`. The value is
a struct that conforms to the [aiplatform.gapic.Explanation] object. -
`jsonl`: The JSON objects on each line include an additional entry
keyed `explanation`. The value of the entry is a JSON object that
conforms to the [aiplatform.gapic.Explanation] object. - `csv`:
Generating explanations for CSV format is not supported. If this
field is set to true, either the Model.explanation_spec or
explanation_metadata and explanation_parameters must be populated.'
isOptional: true
parameterType: BOOLEAN
included_fields:
defaultValue: []
description: 'Fields that will be included in the prediction instance that
is
sent to the Model.
If [instance_type][] is `array`, the order of field names in
included_fields also determines the order of the values in the array.
When included_fields is populated, [excluded_fields][] must be empty.
The input must be JSONL with objects at each line, CSV, BigQuery
or TfRecord.'
isOptional: true
parameterType: LIST
instance_type:
defaultValue: ''
description: "The format of the instance that the Model accepts. Vertex\
\ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\
to the specified format.\nSupported values are:\n** `object`: Each input\
\ is converted to JSON object format.\n* For `bigquery`, each row is converted\
\ to an object.\n* For `jsonl`, each line of the JSONL input must be an\
\ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\
** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\
\ each row is converted to an array. The order\n of columns is determined\
\ by the BigQuery column order, unless\n [included_fields][] is populated.\n\
\ [included_fields][] must be populated for specifying field orders.\n\
* For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\
\ must be populated for specifying field orders.\n* Does not apply to\
\ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\
\ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\
\ and `csv`, the behavior is the same as `array`. The\n order of columns\
\ is the same as defined in the file or table, unless\n [included_fields][]\
\ is populated.\n * For `jsonl`, the prediction instance format is determined\
\ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\
\ each record will be converted to\n an object in the format of `{\"\
b64\": <value>}`, where `<value>` is\n the Base64-encoded string of\
\ the content of the record.\n * For `file-list`, each file in the list\
\ will be converted to an\n object in the format of `{\"b64\": <value>}`,\
\ where `<value>` is\n the Base64-encoded string of the content of the\
\ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\
\ Base64 is not for this field. --)"
isOptional: true
parameterType: STRING
instances_format:
defaultValue: jsonl
description: "The format in which instances are\ngiven, must be one of the\
\ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\
. For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig."
isOptional: true
parameterType: STRING
job_display_name:
description: The user-defined name of this BatchPredictionJob.
parameterType: STRING
key_field:
defaultValue: ''
description: "The name of the field that is considered as a key.\nThe values\
\ identified by the key field is not included in the\ntransformed instances\
\ that is sent to the Model. This is similar to\nspecifying this name\
\ of the field in [excluded_fields][]. In addition,\nthe batch prediction\
\ output will not include the instances. Instead the\noutput will only\
\ include the value of the key field, in a field named\n`key` in the output:\n\
\ * For `jsonl` output format, the output will have a `key` field\n \
\ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\
\ the output will have have a `key`\n column instead of the instance\
\ feature columns.\nThe input must be JSONL with objects at each line,\
\ CSV, BigQuery\nor TfRecord."
isOptional: true
parameterType: STRING
labels:
defaultValue: {}
description: 'The labels with user-defined metadata to
organize your BatchPredictionJobs. Label keys and values can be no
longer than 64 characters (Unicode codepoints), can only contain
lowercase letters, numeric characters, underscores and dashes.
International characters are allowed. See https://goo.gl/xmQnxf for
more information and examples of labels.'
isOptional: true
parameterType: STRUCT
location:
defaultValue: us-central1
description: 'Location for creating the BatchPredictionJob.
If not set, default to us-central1.'
isOptional: true
parameterType: STRING
machine_type:
defaultValue: ''
description: 'The type of machine for running batch
prediction on dedicated resources. If the Model supports
DEDICATED_RESOURCES this config may be provided (and the job will use
these resources). If the Model doesn''t support AUTOMATIC_RESOURCES,
this config must be provided. For more details about the
BatchDedicatedResources, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources.
For more details about the machine spec, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec'
isOptional: true
parameterType: STRING
manual_batch_tuning_parameters_batch_size:
defaultValue: 0.0
description: 'The number of
the records (e.g. instances) of the operation given in each batch to a
machine replica. Machine type, and size of a single record should be
considered when setting this parameter, higher value speeds up the
batch operation''s execution, but too high value will result in a whole
batch not fitting in a machine''s memory, and the whole operation will
fail. The default value is 4.'
isOptional: true
parameterType: NUMBER_INTEGER
max_replica_count:
defaultValue: 0.0
description: 'The maximum number of machine replicas the batch operation
may be scaled
to. Only used if `machine_type` is set. Default is 10.'
isOptional: true
parameterType: NUMBER_INTEGER
model_parameters:
defaultValue: {}
description: The parameters that govern the predictions. The schema of the
parameters
isOptional: true
parameterType: STRUCT
predictions_format:
defaultValue: jsonl
description: "The format in which Vertex AI gives the predictions. Must\
\ be one of the\nModel's supportedOutputStorageFormats. If not set, default\
\ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig."
isOptional: true
parameterType: STRING
project:
description: Project to create the BatchPredictionJob.
parameterType: STRING
starting_replica_count:
defaultValue: 0.0
description: 'The number of machine replicas
used at the start of the batch operation. If not set, Vertex AI
decides starting number, not greater than `max_replica_count`. Only
used if `machine_type` is set.'
isOptional: true
parameterType: NUMBER_INTEGER
outputDefinitions:
artifacts:
batchpredictionjob:
artifactType:
schemaTitle: google.VertexBatchPredictionJob
schemaVersion: 0.0.1
description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table
instead.**] Artifact
representation of the created batch prediction job.'
bigquery_output_table:
artifactType:
schemaTitle: google.BQTable
schemaVersion: 0.0.1
description: 'Artifact tracking the batch prediction job output. This is
only
available if
bigquery_output_table is specified.'
gcs_output_directory:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: 'Artifact tracking the batch prediction job output. This is
only
available if
gcs_destination_output_uri_prefix is specified.'
parameters:
gcp_resources:
description: 'Serialized gcp_resources proto tracking the batch prediction
job.
For more details, see
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.'
parameterType: STRING
comp-model-batch-predict-3:
executorLabel: exec-model-batch-predict-3
inputDefinitions:
artifacts:
model:
artifactType:
schemaTitle: google.VertexModel
schemaVersion: 0.0.1
description: 'The Model used to get predictions via this job. Must share
the same
ancestor Location. Starting this job has no impact on any existing
deployments of the Model and their resources. Either this or
unmanaged_container_model must be specified.'
isOptional: true
unmanaged_container_model:
artifactType:
schemaTitle: google.UnmanagedContainerModel
schemaVersion: 0.0.1
description: 'The unmanaged container model used to get predictions via
this job.
This should be used for models that are not uploaded to Vertex. Either
this or model must be specified.'
isOptional: true
parameters:
accelerator_count:
defaultValue: 0.0
description: 'The number of accelerators to attach
to the `machine_type`. Only used if `machine_type` is set. For more
details about the machine spec, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec'
isOptional: true
parameterType: NUMBER_INTEGER
accelerator_type:
defaultValue: ''
description: 'The type of accelerator(s) that may be
attached to the machine as per `accelerator_count`. Only used if
`machine_type` is set. For more details about the machine spec, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec'
isOptional: true
parameterType: STRING
bigquery_destination_output_uri:
defaultValue: ''
description: 'The BigQuery project location where the output is to be written
to. In
the given project a new dataset is created with name
``prediction_<model-display-name>_<job-create-time>`` where is made
BigQuery-dataset-name compatible (for example, most special characters
become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ
"based on ISO-8601" format. In the dataset two tables will be created,
``predictions``, and ``errors``. If the Model has both ``instance``
and ``prediction`` schemata defined then the tables have columns as
follows: The ``predictions`` table contains instances for which the
prediction succeeded, it has columns as per a concatenation of the
Model''s instance and prediction schemata. The ``errors`` table
contains rows for which the prediction has failed, it has instance
columns, as per the instance schema, followed by a single "errors"
column, which as values has ```google.rpc.Status`` <Status>`__
represented as a STRUCT, and containing only ``code`` and
``message``. For more details about this output config, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.'
isOptional: true
parameterType: STRING
bigquery_source_input_uri:
defaultValue: ''
description: 'BigQuery URI to a table, up to 2000 characters long. For example:
`projectId.bqDatasetId.bqTableId` For more details about this input
config, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.'
isOptional: true
parameterType: STRING
encryption_spec_key_name:
defaultValue: ''
description: 'Customer-managed encryption
key options for a BatchPredictionJob. If this is set, then all
resources created by the BatchPredictionJob will be encrypted with the
provided encryption key. Has the form:
``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``.
The key needs to be in the same region as where the compute resource
is created.'
isOptional: true
parameterType: STRING
excluded_fields:
defaultValue: []
description: 'Fields that will be excluded in the prediction instance that
is
sent to the Model.
Excluded will be attached to the batch prediction output if
[key_field][] is not specified.
When excluded_fields is populated, [included_fields][] must be empty.
The input must be JSONL with objects at each line, CSV, BigQuery
or TfRecord.
may be specified via the Model''s `parameters_schema_uri`.'
isOptional: true
parameterType: LIST
explanation_metadata:
defaultValue: {}
description: 'Explanation metadata
configuration for this BatchPredictionJob. Can be specified only if
`generate_explanation` is set to `True`. This value overrides the
value of `Model.explanation_metadata`. All fields of
`explanation_metadata` are optional in the request. If a field of the
`explanation_metadata` object is not populated, the corresponding
field of the `Model.explanation_metadata` object is inherited. For
more details, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.'
isOptional: true
parameterType: STRUCT
explanation_parameters:
defaultValue: {}
description: 'Parameters to configure
explaining for Model''s predictions. Can be specified only if
`generate_explanation` is set to `True`. This value overrides the
value of `Model.explanation_parameters`. All fields of
`explanation_parameters` are optional in the request. If a field of
the `explanation_parameters` object is not populated, the
corresponding field of the `Model.explanation_parameters` object is
inherited. For more details, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.'
isOptional: true
parameterType: STRUCT
gcs_destination_output_uri_prefix:
defaultValue: ''
description: 'The Google Cloud
Storage location of the directory where the output is to be written
to. In the given directory a new directory is created. Its name is
``prediction-<model-display-name>-<job-create-time>``, where timestamp
is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files
``predictions_0001.<extension>``, ``predictions_0002.<extension>``,
..., ``predictions_N.<extension>`` are created where ``<extension>``
depends on chosen ``predictions_format``, and N may equal 0001 and
depends on the total number of successfully predicted instances. If
the Model has both ``instance`` and ``prediction`` schemata defined
then each such file contains predictions as per the
``predictions_format``. If prediction for any instance failed
(partially or completely), then an additional
``errors_0001.<extension>``, ``errors_0002.<extension>``,...,
``errors_N.<extension>`` files are created (N depends on total number
of failed predictions). These files contain the failed instances, as
per their schema, followed by an additional ``error`` field which as
value has ``google.rpc.Status`` containing only ``code`` and
``message`` fields. For more details about this output config, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.'
isOptional: true
parameterType: STRING
gcs_source_uris:
defaultValue: []
description: "Google Cloud Storage URI(-s) to your instances to run batch\
\ prediction\non. They must match `instances_format`. May contain wildcards.\
\ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\
For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig."
isOptional: true
parameterType: LIST
generate_explanation:
defaultValue: false
description: 'Generate explanation along with
the batch prediction results. This will cause the batch prediction
output to include explanations based on the `prediction_format`: -
`bigquery`: output includes a column named `explanation`. The value is
a struct that conforms to the [aiplatform.gapic.Explanation] object. -
`jsonl`: The JSON objects on each line include an additional entry
keyed `explanation`. The value of the entry is a JSON object that
conforms to the [aiplatform.gapic.Explanation] object. - `csv`:
Generating explanations for CSV format is not supported. If this
field is set to true, either the Model.explanation_spec or
explanation_metadata and explanation_parameters must be populated.'
isOptional: true
parameterType: BOOLEAN
included_fields:
defaultValue: []
description: 'Fields that will be included in the prediction instance that
is
sent to the Model.
If [instance_type][] is `array`, the order of field names in
included_fields also determines the order of the values in the array.
When included_fields is populated, [excluded_fields][] must be empty.
The input must be JSONL with objects at each line, CSV, BigQuery
or TfRecord.'
isOptional: true
parameterType: LIST
instance_type:
defaultValue: ''
description: "The format of the instance that the Model accepts. Vertex\
\ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\
to the specified format.\nSupported values are:\n** `object`: Each input\
\ is converted to JSON object format.\n* For `bigquery`, each row is converted\
\ to an object.\n* For `jsonl`, each line of the JSONL input must be an\
\ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\
** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\
\ each row is converted to an array. The order\n of columns is determined\
\ by the BigQuery column order, unless\n [included_fields][] is populated.\n\
\ [included_fields][] must be populated for specifying field orders.\n\
* For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\
\ must be populated for specifying field orders.\n* Does not apply to\
\ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\
\ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\
\ and `csv`, the behavior is the same as `array`. The\n order of columns\
\ is the same as defined in the file or table, unless\n [included_fields][]\
\ is populated.\n * For `jsonl`, the prediction instance format is determined\
\ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\
\ each record will be converted to\n an object in the format of `{\"\
b64\": <value>}`, where `<value>` is\n the Base64-encoded string of\
\ the content of the record.\n * For `file-list`, each file in the list\
\ will be converted to an\n object in the format of `{\"b64\": <value>}`,\
\ where `<value>` is\n the Base64-encoded string of the content of the\
\ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\
\ Base64 is not for this field. --)"
isOptional: true
parameterType: STRING
instances_format:
defaultValue: jsonl
description: "The format in which instances are\ngiven, must be one of the\
\ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\
. For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig."
isOptional: true
parameterType: STRING
job_display_name:
description: The user-defined name of this BatchPredictionJob.
parameterType: STRING
key_field:
defaultValue: ''
description: "The name of the field that is considered as a key.\nThe values\
\ identified by the key field is not included in the\ntransformed instances\
\ that is sent to the Model. This is similar to\nspecifying this name\
\ of the field in [excluded_fields][]. In addition,\nthe batch prediction\
\ output will not include the instances. Instead the\noutput will only\
\ include the value of the key field, in a field named\n`key` in the output:\n\
\ * For `jsonl` output format, the output will have a `key` field\n \
\ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\
\ the output will have have a `key`\n column instead of the instance\
\ feature columns.\nThe input must be JSONL with objects at each line,\
\ CSV, BigQuery\nor TfRecord."
isOptional: true
parameterType: STRING
labels:
defaultValue: {}
description: 'The labels with user-defined metadata to
organize your BatchPredictionJobs. Label keys and values can be no
longer than 64 characters (Unicode codepoints), can only contain
lowercase letters, numeric characters, underscores and dashes.
International characters are allowed. See https://goo.gl/xmQnxf for
more information and examples of labels.'
isOptional: true
parameterType: STRUCT
location:
defaultValue: us-central1
description: 'Location for creating the BatchPredictionJob.
If not set, default to us-central1.'
isOptional: true
parameterType: STRING
machine_type:
defaultValue: ''
description: 'The type of machine for running batch
prediction on dedicated resources. If the Model supports
DEDICATED_RESOURCES this config may be provided (and the job will use
these resources). If the Model doesn''t support AUTOMATIC_RESOURCES,
this config must be provided. For more details about the
BatchDedicatedResources, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources.
For more details about the machine spec, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec'
isOptional: true
parameterType: STRING
manual_batch_tuning_parameters_batch_size:
defaultValue: 0.0
description: 'The number of
the records (e.g. instances) of the operation given in each batch to a
machine replica. Machine type, and size of a single record should be
considered when setting this parameter, higher value speeds up the
batch operation''s execution, but too high value will result in a whole
batch not fitting in a machine''s memory, and the whole operation will
fail. The default value is 4.'
isOptional: true
parameterType: NUMBER_INTEGER
max_replica_count:
defaultValue: 0.0
description: 'The maximum number of machine replicas the batch operation
may be scaled
to. Only used if `machine_type` is set. Default is 10.'
isOptional: true
parameterType: NUMBER_INTEGER
model_parameters:
defaultValue: {}
description: The parameters that govern the predictions. The schema of the
parameters
isOptional: true
parameterType: STRUCT
predictions_format:
defaultValue: jsonl
description: "The format in which Vertex AI gives the predictions. Must\
\ be one of the\nModel's supportedOutputStorageFormats. If not set, default\
\ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig."
isOptional: true
parameterType: STRING
project:
description: Project to create the BatchPredictionJob.
parameterType: STRING
starting_replica_count:
defaultValue: 0.0
description: 'The number of machine replicas
used at the start of the batch operation. If not set, Vertex AI
decides starting number, not greater than `max_replica_count`. Only
used if `machine_type` is set.'
isOptional: true
parameterType: NUMBER_INTEGER
outputDefinitions:
artifacts:
batchpredictionjob:
artifactType:
schemaTitle: google.VertexBatchPredictionJob
schemaVersion: 0.0.1
description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table
instead.**] Artifact
representation of the created batch prediction job.'
bigquery_output_table:
artifactType:
schemaTitle: google.BQTable
schemaVersion: 0.0.1
description: 'Artifact tracking the batch prediction job output. This is
only
available if
bigquery_output_table is specified.'
gcs_output_directory:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: 'Artifact tracking the batch prediction job output. This is
only
available if
gcs_destination_output_uri_prefix is specified.'
parameters:
gcp_resources:
description: 'Serialized gcp_resources proto tracking the batch prediction
job.
For more details, see
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.'
parameterType: STRING
comp-model-batch-predict-4:
executorLabel: exec-model-batch-predict-4
inputDefinitions:
artifacts:
model:
artifactType:
schemaTitle: google.VertexModel
schemaVersion: 0.0.1
description: 'The Model used to get predictions via this job. Must share
the same
ancestor Location. Starting this job has no impact on any existing
deployments of the Model and their resources. Either this or
unmanaged_container_model must be specified.'
isOptional: true
unmanaged_container_model:
artifactType:
schemaTitle: google.UnmanagedContainerModel
schemaVersion: 0.0.1
description: 'The unmanaged container model used to get predictions via
this job.
This should be used for models that are not uploaded to Vertex. Either
this or model must be specified.'
isOptional: true
parameters:
accelerator_count:
defaultValue: 0.0
description: 'The number of accelerators to attach
to the `machine_type`. Only used if `machine_type` is set. For more
details about the machine spec, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec'
isOptional: true
parameterType: NUMBER_INTEGER
accelerator_type:
defaultValue: ''
description: 'The type of accelerator(s) that may be
attached to the machine as per `accelerator_count`. Only used if
`machine_type` is set. For more details about the machine spec, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec'
isOptional: true
parameterType: STRING
bigquery_destination_output_uri:
defaultValue: ''
description: 'The BigQuery project location where the output is to be written
to. In
the given project a new dataset is created with name
``prediction_<model-display-name>_<job-create-time>`` where is made
BigQuery-dataset-name compatible (for example, most special characters
become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ
"based on ISO-8601" format. In the dataset two tables will be created,
``predictions``, and ``errors``. If the Model has both ``instance``
and ``prediction`` schemata defined then the tables have columns as
follows: The ``predictions`` table contains instances for which the
prediction succeeded, it has columns as per a concatenation of the
Model''s instance and prediction schemata. The ``errors`` table
contains rows for which the prediction has failed, it has instance
columns, as per the instance schema, followed by a single "errors"
column, which as values has ```google.rpc.Status`` <Status>`__
represented as a STRUCT, and containing only ``code`` and
``message``. For more details about this output config, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.'
isOptional: true
parameterType: STRING
bigquery_source_input_uri:
defaultValue: ''
description: 'BigQuery URI to a table, up to 2000 characters long. For example:
`projectId.bqDatasetId.bqTableId` For more details about this input
config, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.'
isOptional: true
parameterType: STRING
encryption_spec_key_name:
defaultValue: ''
description: 'Customer-managed encryption
key options for a BatchPredictionJob. If this is set, then all
resources created by the BatchPredictionJob will be encrypted with the
provided encryption key. Has the form:
``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``.
The key needs to be in the same region as where the compute resource
is created.'
isOptional: true
parameterType: STRING
excluded_fields:
defaultValue: []
description: 'Fields that will be excluded in the prediction instance that
is
sent to the Model.
Excluded will be attached to the batch prediction output if
[key_field][] is not specified.
When excluded_fields is populated, [included_fields][] must be empty.
The input must be JSONL with objects at each line, CSV, BigQuery
or TfRecord.
may be specified via the Model''s `parameters_schema_uri`.'
isOptional: true
parameterType: LIST
explanation_metadata:
defaultValue: {}
description: 'Explanation metadata
configuration for this BatchPredictionJob. Can be specified only if
`generate_explanation` is set to `True`. This value overrides the
value of `Model.explanation_metadata`. All fields of
`explanation_metadata` are optional in the request. If a field of the
`explanation_metadata` object is not populated, the corresponding
field of the `Model.explanation_metadata` object is inherited. For
more details, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.'
isOptional: true
parameterType: STRUCT
explanation_parameters:
defaultValue: {}
description: 'Parameters to configure
explaining for Model''s predictions. Can be specified only if
`generate_explanation` is set to `True`. This value overrides the
value of `Model.explanation_parameters`. All fields of
`explanation_parameters` are optional in the request. If a field of
the `explanation_parameters` object is not populated, the
corresponding field of the `Model.explanation_parameters` object is
inherited. For more details, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.'
isOptional: true
parameterType: STRUCT
gcs_destination_output_uri_prefix:
defaultValue: ''
description: 'The Google Cloud
Storage location of the directory where the output is to be written
to. In the given directory a new directory is created. Its name is
``prediction-<model-display-name>-<job-create-time>``, where timestamp
is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files
``predictions_0001.<extension>``, ``predictions_0002.<extension>``,
..., ``predictions_N.<extension>`` are created where ``<extension>``
depends on chosen ``predictions_format``, and N may equal 0001 and
depends on the total number of successfully predicted instances. If
the Model has both ``instance`` and ``prediction`` schemata defined
then each such file contains predictions as per the
``predictions_format``. If prediction for any instance failed
(partially or completely), then an additional
``errors_0001.<extension>``, ``errors_0002.<extension>``,...,
``errors_N.<extension>`` files are created (N depends on total number
of failed predictions). These files contain the failed instances, as
per their schema, followed by an additional ``error`` field which as
value has ``google.rpc.Status`` containing only ``code`` and
``message`` fields. For more details about this output config, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.'
isOptional: true
parameterType: STRING
gcs_source_uris:
defaultValue: []
description: "Google Cloud Storage URI(-s) to your instances to run batch\
\ prediction\non. They must match `instances_format`. May contain wildcards.\
\ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\
For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig."
isOptional: true
parameterType: LIST
generate_explanation:
defaultValue: false
description: 'Generate explanation along with
the batch prediction results. This will cause the batch prediction
output to include explanations based on the `prediction_format`: -
`bigquery`: output includes a column named `explanation`. The value is
a struct that conforms to the [aiplatform.gapic.Explanation] object. -
`jsonl`: The JSON objects on each line include an additional entry
keyed `explanation`. The value of the entry is a JSON object that
conforms to the [aiplatform.gapic.Explanation] object. - `csv`:
Generating explanations for CSV format is not supported. If this
field is set to true, either the Model.explanation_spec or
explanation_metadata and explanation_parameters must be populated.'
isOptional: true
parameterType: BOOLEAN
included_fields:
defaultValue: []
description: 'Fields that will be included in the prediction instance that
is
sent to the Model.
If [instance_type][] is `array`, the order of field names in
included_fields also determines the order of the values in the array.
When included_fields is populated, [excluded_fields][] must be empty.
The input must be JSONL with objects at each line, CSV, BigQuery
or TfRecord.'
isOptional: true
parameterType: LIST
instance_type:
defaultValue: ''
description: "The format of the instance that the Model accepts. Vertex\
\ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\
to the specified format.\nSupported values are:\n** `object`: Each input\
\ is converted to JSON object format.\n* For `bigquery`, each row is converted\
\ to an object.\n* For `jsonl`, each line of the JSONL input must be an\
\ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\
** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\
\ each row is converted to an array. The order\n of columns is determined\
\ by the BigQuery column order, unless\n [included_fields][] is populated.\n\
\ [included_fields][] must be populated for specifying field orders.\n\
* For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\
\ must be populated for specifying field orders.\n* Does not apply to\
\ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\
\ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\
\ and `csv`, the behavior is the same as `array`. The\n order of columns\
\ is the same as defined in the file or table, unless\n [included_fields][]\
\ is populated.\n * For `jsonl`, the prediction instance format is determined\
\ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\
\ each record will be converted to\n an object in the format of `{\"\
b64\": <value>}`, where `<value>` is\n the Base64-encoded string of\
\ the content of the record.\n * For `file-list`, each file in the list\
\ will be converted to an\n object in the format of `{\"b64\": <value>}`,\
\ where `<value>` is\n the Base64-encoded string of the content of the\
\ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\
\ Base64 is not for this field. --)"
isOptional: true
parameterType: STRING
instances_format:
defaultValue: jsonl
description: "The format in which instances are\ngiven, must be one of the\
\ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\
. For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig."
isOptional: true
parameterType: STRING
job_display_name:
description: The user-defined name of this BatchPredictionJob.
parameterType: STRING
key_field:
defaultValue: ''
description: "The name of the field that is considered as a key.\nThe values\
\ identified by the key field is not included in the\ntransformed instances\
\ that is sent to the Model. This is similar to\nspecifying this name\
\ of the field in [excluded_fields][]. In addition,\nthe batch prediction\
\ output will not include the instances. Instead the\noutput will only\
\ include the value of the key field, in a field named\n`key` in the output:\n\
\ * For `jsonl` output format, the output will have a `key` field\n \
\ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\
\ the output will have have a `key`\n column instead of the instance\
\ feature columns.\nThe input must be JSONL with objects at each line,\
\ CSV, BigQuery\nor TfRecord."
isOptional: true
parameterType: STRING
labels:
defaultValue: {}
description: 'The labels with user-defined metadata to
organize your BatchPredictionJobs. Label keys and values can be no
longer than 64 characters (Unicode codepoints), can only contain
lowercase letters, numeric characters, underscores and dashes.
International characters are allowed. See https://goo.gl/xmQnxf for
more information and examples of labels.'
isOptional: true
parameterType: STRUCT
location:
defaultValue: us-central1
description: 'Location for creating the BatchPredictionJob.
If not set, default to us-central1.'
isOptional: true
parameterType: STRING
machine_type:
defaultValue: ''
description: 'The type of machine for running batch
prediction on dedicated resources. If the Model supports
DEDICATED_RESOURCES this config may be provided (and the job will use
these resources). If the Model doesn''t support AUTOMATIC_RESOURCES,
this config must be provided. For more details about the
BatchDedicatedResources, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources.
For more details about the machine spec, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec'
isOptional: true
parameterType: STRING
manual_batch_tuning_parameters_batch_size:
defaultValue: 0.0
description: 'The number of
the records (e.g. instances) of the operation given in each batch to a
machine replica. Machine type, and size of a single record should be
considered when setting this parameter, higher value speeds up the
batch operation''s execution, but too high value will result in a whole
batch not fitting in a machine''s memory, and the whole operation will
fail. The default value is 4.'
isOptional: true
parameterType: NUMBER_INTEGER
max_replica_count:
defaultValue: 0.0
description: 'The maximum number of machine replicas the batch operation
may be scaled
to. Only used if `machine_type` is set. Default is 10.'
isOptional: true
parameterType: NUMBER_INTEGER
model_parameters:
defaultValue: {}
description: The parameters that govern the predictions. The schema of the
parameters
isOptional: true
parameterType: STRUCT
predictions_format:
defaultValue: jsonl
description: "The format in which Vertex AI gives the predictions. Must\
\ be one of the\nModel's supportedOutputStorageFormats. If not set, default\
\ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig."
isOptional: true
parameterType: STRING
project:
description: Project to create the BatchPredictionJob.
parameterType: STRING
starting_replica_count:
defaultValue: 0.0
description: 'The number of machine replicas
used at the start of the batch operation. If not set, Vertex AI
decides starting number, not greater than `max_replica_count`. Only
used if `machine_type` is set.'
isOptional: true
parameterType: NUMBER_INTEGER
outputDefinitions:
artifacts:
batchpredictionjob:
artifactType:
schemaTitle: google.VertexBatchPredictionJob
schemaVersion: 0.0.1
description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table
instead.**] Artifact
representation of the created batch prediction job.'
bigquery_output_table:
artifactType:
schemaTitle: google.BQTable
schemaVersion: 0.0.1
description: 'Artifact tracking the batch prediction job output. This is
only
available if
bigquery_output_table is specified.'
gcs_output_directory:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: 'Artifact tracking the batch prediction job output. This is
only
available if
gcs_destination_output_uri_prefix is specified.'
parameters:
gcp_resources:
description: 'Serialized gcp_resources proto tracking the batch prediction
job.
For more details, see
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.'
parameterType: STRING
comp-model-batch-predict-5:
executorLabel: exec-model-batch-predict-5
inputDefinitions:
artifacts:
model:
artifactType:
schemaTitle: google.VertexModel
schemaVersion: 0.0.1
description: 'The Model used to get predictions via this job. Must share
the same
ancestor Location. Starting this job has no impact on any existing
deployments of the Model and their resources. Either this or
unmanaged_container_model must be specified.'
isOptional: true
unmanaged_container_model:
artifactType:
schemaTitle: google.UnmanagedContainerModel
schemaVersion: 0.0.1
description: 'The unmanaged container model used to get predictions via
this job.
This should be used for models that are not uploaded to Vertex. Either
this or model must be specified.'
isOptional: true
parameters:
accelerator_count:
defaultValue: 0.0
description: 'The number of accelerators to attach
to the `machine_type`. Only used if `machine_type` is set. For more
details about the machine spec, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec'
isOptional: true
parameterType: NUMBER_INTEGER
accelerator_type:
defaultValue: ''
description: 'The type of accelerator(s) that may be
attached to the machine as per `accelerator_count`. Only used if
`machine_type` is set. For more details about the machine spec, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec'
isOptional: true
parameterType: STRING
bigquery_destination_output_uri:
defaultValue: ''
description: 'The BigQuery project location where the output is to be written
to. In
the given project a new dataset is created with name
``prediction_<model-display-name>_<job-create-time>`` where is made
BigQuery-dataset-name compatible (for example, most special characters
become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ
"based on ISO-8601" format. In the dataset two tables will be created,
``predictions``, and ``errors``. If the Model has both ``instance``
and ``prediction`` schemata defined then the tables have columns as
follows: The ``predictions`` table contains instances for which the
prediction succeeded, it has columns as per a concatenation of the
Model''s instance and prediction schemata. The ``errors`` table
contains rows for which the prediction has failed, it has instance
columns, as per the instance schema, followed by a single "errors"
column, which as values has ```google.rpc.Status`` <Status>`__
represented as a STRUCT, and containing only ``code`` and
``message``. For more details about this output config, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.'
isOptional: true
parameterType: STRING
bigquery_source_input_uri:
defaultValue: ''
description: 'BigQuery URI to a table, up to 2000 characters long. For example:
`projectId.bqDatasetId.bqTableId` For more details about this input
config, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.'
isOptional: true
parameterType: STRING
encryption_spec_key_name:
defaultValue: ''
description: 'Customer-managed encryption
key options for a BatchPredictionJob. If this is set, then all
resources created by the BatchPredictionJob will be encrypted with the
provided encryption key. Has the form:
``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``.
The key needs to be in the same region as where the compute resource
is created.'
isOptional: true
parameterType: STRING
excluded_fields:
defaultValue: []
description: 'Fields that will be excluded in the prediction instance that
is
sent to the Model.
Excluded will be attached to the batch prediction output if
[key_field][] is not specified.
When excluded_fields is populated, [included_fields][] must be empty.
The input must be JSONL with objects at each line, CSV, BigQuery
or TfRecord.
may be specified via the Model''s `parameters_schema_uri`.'
isOptional: true
parameterType: LIST
explanation_metadata:
defaultValue: {}
description: 'Explanation metadata
configuration for this BatchPredictionJob. Can be specified only if
`generate_explanation` is set to `True`. This value overrides the
value of `Model.explanation_metadata`. All fields of
`explanation_metadata` are optional in the request. If a field of the
`explanation_metadata` object is not populated, the corresponding
field of the `Model.explanation_metadata` object is inherited. For
more details, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.'
isOptional: true
parameterType: STRUCT
explanation_parameters:
defaultValue: {}
description: 'Parameters to configure
explaining for Model''s predictions. Can be specified only if
`generate_explanation` is set to `True`. This value overrides the
value of `Model.explanation_parameters`. All fields of
`explanation_parameters` are optional in the request. If a field of
the `explanation_parameters` object is not populated, the
corresponding field of the `Model.explanation_parameters` object is
inherited. For more details, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.'
isOptional: true
parameterType: STRUCT
gcs_destination_output_uri_prefix:
defaultValue: ''
description: 'The Google Cloud
Storage location of the directory where the output is to be written
to. In the given directory a new directory is created. Its name is
``prediction-<model-display-name>-<job-create-time>``, where timestamp
is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files
``predictions_0001.<extension>``, ``predictions_0002.<extension>``,
..., ``predictions_N.<extension>`` are created where ``<extension>``
depends on chosen ``predictions_format``, and N may equal 0001 and
depends on the total number of successfully predicted instances. If
the Model has both ``instance`` and ``prediction`` schemata defined
then each such file contains predictions as per the
``predictions_format``. If prediction for any instance failed
(partially or completely), then an additional
``errors_0001.<extension>``, ``errors_0002.<extension>``,...,
``errors_N.<extension>`` files are created (N depends on total number
of failed predictions). These files contain the failed instances, as
per their schema, followed by an additional ``error`` field which as
value has ``google.rpc.Status`` containing only ``code`` and
``message`` fields. For more details about this output config, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.'
isOptional: true
parameterType: STRING
gcs_source_uris:
defaultValue: []
description: "Google Cloud Storage URI(-s) to your instances to run batch\
\ prediction\non. They must match `instances_format`. May contain wildcards.\
\ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\
For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig."
isOptional: true
parameterType: LIST
generate_explanation:
defaultValue: false
description: 'Generate explanation along with
the batch prediction results. This will cause the batch prediction
output to include explanations based on the `prediction_format`: -
`bigquery`: output includes a column named `explanation`. The value is
a struct that conforms to the [aiplatform.gapic.Explanation] object. -
`jsonl`: The JSON objects on each line include an additional entry
keyed `explanation`. The value of the entry is a JSON object that
conforms to the [aiplatform.gapic.Explanation] object. - `csv`:
Generating explanations for CSV format is not supported. If this
field is set to true, either the Model.explanation_spec or
explanation_metadata and explanation_parameters must be populated.'
isOptional: true
parameterType: BOOLEAN
included_fields:
defaultValue: []
description: 'Fields that will be included in the prediction instance that
is
sent to the Model.
If [instance_type][] is `array`, the order of field names in
included_fields also determines the order of the values in the array.
When included_fields is populated, [excluded_fields][] must be empty.
The input must be JSONL with objects at each line, CSV, BigQuery
or TfRecord.'
isOptional: true
parameterType: LIST
instance_type:
defaultValue: ''
description: "The format of the instance that the Model accepts. Vertex\
\ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\
to the specified format.\nSupported values are:\n** `object`: Each input\
\ is converted to JSON object format.\n* For `bigquery`, each row is converted\
\ to an object.\n* For `jsonl`, each line of the JSONL input must be an\
\ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\
** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\
\ each row is converted to an array. The order\n of columns is determined\
\ by the BigQuery column order, unless\n [included_fields][] is populated.\n\
\ [included_fields][] must be populated for specifying field orders.\n\
* For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\
\ must be populated for specifying field orders.\n* Does not apply to\
\ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\
\ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\
\ and `csv`, the behavior is the same as `array`. The\n order of columns\
\ is the same as defined in the file or table, unless\n [included_fields][]\
\ is populated.\n * For `jsonl`, the prediction instance format is determined\
\ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\
\ each record will be converted to\n an object in the format of `{\"\
b64\": <value>}`, where `<value>` is\n the Base64-encoded string of\
\ the content of the record.\n * For `file-list`, each file in the list\
\ will be converted to an\n object in the format of `{\"b64\": <value>}`,\
\ where `<value>` is\n the Base64-encoded string of the content of the\
\ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\
\ Base64 is not for this field. --)"
isOptional: true
parameterType: STRING
instances_format:
defaultValue: jsonl
description: "The format in which instances are\ngiven, must be one of the\
\ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\
. For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig."
isOptional: true
parameterType: STRING
job_display_name:
description: The user-defined name of this BatchPredictionJob.
parameterType: STRING
key_field:
defaultValue: ''
description: "The name of the field that is considered as a key.\nThe values\
\ identified by the key field is not included in the\ntransformed instances\
\ that is sent to the Model. This is similar to\nspecifying this name\
\ of the field in [excluded_fields][]. In addition,\nthe batch prediction\
\ output will not include the instances. Instead the\noutput will only\
\ include the value of the key field, in a field named\n`key` in the output:\n\
\ * For `jsonl` output format, the output will have a `key` field\n \
\ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\
\ the output will have have a `key`\n column instead of the instance\
\ feature columns.\nThe input must be JSONL with objects at each line,\
\ CSV, BigQuery\nor TfRecord."
isOptional: true
parameterType: STRING
labels:
defaultValue: {}
description: 'The labels with user-defined metadata to
organize your BatchPredictionJobs. Label keys and values can be no
longer than 64 characters (Unicode codepoints), can only contain
lowercase letters, numeric characters, underscores and dashes.
International characters are allowed. See https://goo.gl/xmQnxf for
more information and examples of labels.'
isOptional: true
parameterType: STRUCT
location:
defaultValue: us-central1
description: 'Location for creating the BatchPredictionJob.
If not set, default to us-central1.'
isOptional: true
parameterType: STRING
machine_type:
defaultValue: ''
description: 'The type of machine for running batch
prediction on dedicated resources. If the Model supports
DEDICATED_RESOURCES this config may be provided (and the job will use
these resources). If the Model doesn''t support AUTOMATIC_RESOURCES,
this config must be provided. For more details about the
BatchDedicatedResources, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources.
For more details about the machine spec, see
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec'
isOptional: true
parameterType: STRING
manual_batch_tuning_parameters_batch_size:
defaultValue: 0.0
description: 'The number of
the records (e.g. instances) of the operation given in each batch to a
machine replica. Machine type, and size of a single record should be
considered when setting this parameter, higher value speeds up the
batch operation''s execution, but too high value will result in a whole
batch not fitting in a machine''s memory, and the whole operation will
fail. The default value is 4.'
isOptional: true
parameterType: NUMBER_INTEGER
max_replica_count:
defaultValue: 0.0
description: 'The maximum number of machine replicas the batch operation
may be scaled
to. Only used if `machine_type` is set. Default is 10.'
isOptional: true
parameterType: NUMBER_INTEGER
model_parameters:
defaultValue: {}
description: The parameters that govern the predictions. The schema of the
parameters
isOptional: true
parameterType: STRUCT
predictions_format:
defaultValue: jsonl
description: "The format in which Vertex AI gives the predictions. Must\
\ be one of the\nModel's supportedOutputStorageFormats. If not set, default\
\ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig."
isOptional: true
parameterType: STRING
project:
description: Project to create the BatchPredictionJob.
parameterType: STRING
starting_replica_count:
defaultValue: 0.0
description: 'The number of machine replicas
used at the start of the batch operation. If not set, Vertex AI
decides starting number, not greater than `max_replica_count`. Only
used if `machine_type` is set.'
isOptional: true
parameterType: NUMBER_INTEGER
outputDefinitions:
artifacts:
batchpredictionjob:
artifactType:
schemaTitle: google.VertexBatchPredictionJob
schemaVersion: 0.0.1
description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table
instead.**] Artifact
representation of the created batch prediction job.'
bigquery_output_table:
artifactType:
schemaTitle: google.BQTable
schemaVersion: 0.0.1
description: 'Artifact tracking the batch prediction job output. This is
only
available if
bigquery_output_table is specified.'
gcs_output_directory:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: 'Artifact tracking the batch prediction job output. This is
only
available if
gcs_destination_output_uri_prefix is specified.'
parameters:
gcp_resources:
description: 'Serialized gcp_resources proto tracking the batch prediction
job.
For more details, see
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.'
parameterType: STRING
comp-model-evaluation:
executorLabel: exec-model-evaluation
inputDefinitions:
artifacts:
batch_prediction_job:
artifactType:
schemaTitle: google.VertexBatchPredictionJob
schemaVersion: 0.0.1
parameters:
dataflow_disk_size:
defaultValue: 50.0
isOptional: true
parameterType: NUMBER_INTEGER
dataflow_machine_type:
defaultValue: n1-standard-4
isOptional: true
parameterType: STRING
dataflow_max_workers_num:
defaultValue: 100.0
isOptional: true
parameterType: NUMBER_INTEGER
dataflow_service_account:
defaultValue: ''
isOptional: true
parameterType: STRING
dataflow_subnetwork:
defaultValue: ''
isOptional: true
parameterType: STRING
dataflow_use_public_ips:
defaultValue: true
isOptional: true
parameterType: BOOLEAN
dataflow_workers_num:
defaultValue: 10.0
isOptional: true
parameterType: NUMBER_INTEGER
encryption_spec_key_name:
defaultValue: ''
isOptional: true
parameterType: STRING
example_weight_column:
defaultValue: ''
isOptional: true
parameterType: STRING
ground_truth_column:
parameterType: STRING
ground_truth_format:
defaultValue: jsonl
isOptional: true
parameterType: STRING
location:
defaultValue: us-central1
isOptional: true
parameterType: STRING
prediction_id_column:
defaultValue: ''
isOptional: true
parameterType: STRING
prediction_label_column:
defaultValue: ''
isOptional: true
parameterType: STRING
prediction_score_column:
defaultValue: ''
isOptional: true
parameterType: STRING
predictions_format:
defaultValue: jsonl
isOptional: true
parameterType: STRING
problem_type:
parameterType: STRING
project:
parameterType: STRING
root_dir:
parameterType: STRING
outputDefinitions:
artifacts:
evaluation_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
parameters:
gcp_resources:
parameterType: STRING
comp-model-evaluation-2:
executorLabel: exec-model-evaluation-2
inputDefinitions:
artifacts:
batch_prediction_job:
artifactType:
schemaTitle: google.VertexBatchPredictionJob
schemaVersion: 0.0.1
parameters:
dataflow_disk_size:
defaultValue: 50.0
isOptional: true
parameterType: NUMBER_INTEGER
dataflow_machine_type:
defaultValue: n1-standard-4
isOptional: true
parameterType: STRING
dataflow_max_workers_num:
defaultValue: 100.0
isOptional: true
parameterType: NUMBER_INTEGER
dataflow_service_account:
defaultValue: ''
isOptional: true
parameterType: STRING
dataflow_subnetwork:
defaultValue: ''
isOptional: true
parameterType: STRING
dataflow_use_public_ips:
defaultValue: true
isOptional: true
parameterType: BOOLEAN
dataflow_workers_num:
defaultValue: 10.0
isOptional: true
parameterType: NUMBER_INTEGER
encryption_spec_key_name:
defaultValue: ''
isOptional: true
parameterType: STRING
example_weight_column:
defaultValue: ''
isOptional: true
parameterType: STRING
ground_truth_column:
parameterType: STRING
ground_truth_format:
defaultValue: jsonl
isOptional: true
parameterType: STRING
location:
defaultValue: us-central1
isOptional: true
parameterType: STRING
prediction_id_column:
defaultValue: ''
isOptional: true
parameterType: STRING
prediction_label_column:
defaultValue: ''
isOptional: true
parameterType: STRING
prediction_score_column:
defaultValue: ''
isOptional: true
parameterType: STRING
predictions_format:
defaultValue: jsonl
isOptional: true
parameterType: STRING
problem_type:
parameterType: STRING
project:
parameterType: STRING
root_dir:
parameterType: STRING
outputDefinitions:
artifacts:
evaluation_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
parameters:
gcp_resources:
parameterType: STRING
comp-model-evaluation-3:
executorLabel: exec-model-evaluation-3
inputDefinitions:
artifacts:
batch_prediction_job:
artifactType:
schemaTitle: google.VertexBatchPredictionJob
schemaVersion: 0.0.1
parameters:
dataflow_disk_size:
defaultValue: 50.0
isOptional: true
parameterType: NUMBER_INTEGER
dataflow_machine_type:
defaultValue: n1-standard-4
isOptional: true
parameterType: STRING
dataflow_max_workers_num:
defaultValue: 100.0
isOptional: true
parameterType: NUMBER_INTEGER
dataflow_service_account:
defaultValue: ''
isOptional: true
parameterType: STRING
dataflow_subnetwork:
defaultValue: ''
isOptional: true
parameterType: STRING
dataflow_use_public_ips:
defaultValue: true
isOptional: true
parameterType: BOOLEAN
dataflow_workers_num:
defaultValue: 10.0
isOptional: true
parameterType: NUMBER_INTEGER
encryption_spec_key_name:
defaultValue: ''
isOptional: true
parameterType: STRING
example_weight_column:
defaultValue: ''
isOptional: true
parameterType: STRING
ground_truth_column:
parameterType: STRING
ground_truth_format:
defaultValue: jsonl
isOptional: true
parameterType: STRING
location:
defaultValue: us-central1
isOptional: true
parameterType: STRING
prediction_id_column:
defaultValue: ''
isOptional: true
parameterType: STRING
prediction_label_column:
defaultValue: ''
isOptional: true
parameterType: STRING
prediction_score_column:
defaultValue: ''
isOptional: true
parameterType: STRING
predictions_format:
defaultValue: jsonl
isOptional: true
parameterType: STRING
problem_type:
parameterType: STRING
project:
parameterType: STRING
root_dir:
parameterType: STRING
outputDefinitions:
artifacts:
evaluation_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
parameters:
gcp_resources:
parameterType: STRING
comp-model-evaluation-import:
executorLabel: exec-model-evaluation-import
inputDefinitions:
artifacts:
classification_metrics:
artifactType:
schemaTitle: google.ClassificationMetrics
schemaVersion: 0.0.1
description: 'Path of classification metrics generated from the
classification evaluation component.'
isOptional: true
explanation:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
description: 'Path for model explanation metrics generated from an evaluation
component.'
isOptional: true
feature_attributions:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
description: 'The feature attributions metrics artifact generated
from the feature attribution component.'
isOptional: true
forecasting_metrics:
artifactType:
schemaTitle: google.ForecastingMetrics
schemaVersion: 0.0.1
description: 'Path of forecasting metrics generated from the
forecasting evaluation component.'
isOptional: true
metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
description: Path of metrics generated from an evaluation component.
isOptional: true
model:
artifactType:
schemaTitle: google.VertexModel
schemaVersion: 0.0.1
description: 'Vertex model resource that will be the parent resource of
the
uploaded evaluation.'
question_answering_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
isOptional: true
regression_metrics:
artifactType:
schemaTitle: google.RegressionMetrics
schemaVersion: 0.0.1
description: 'Path of regression metrics generated from the regression
evaluation component.'
isOptional: true
summarization_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
isOptional: true
text_generation_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
isOptional: true
parameters:
dataset_path:
defaultValue: ''
isOptional: true
parameterType: STRING
dataset_paths:
defaultValue: []
isOptional: true
parameterType: LIST
dataset_type:
defaultValue: ''
isOptional: true
parameterType: STRING
display_name:
defaultValue: ''
description: The display name for the uploaded model evaluation resource.
isOptional: true
parameterType: STRING
problem_type:
description: 'The problem type of the metrics being imported to the
VertexModel. `classification`, `regression`, and `forecasting` are the
currently supported problem types. Must be provided when `metrics` is
provided.'
isOptional: true
parameterType: STRING
outputDefinitions:
parameters:
gcp_resources:
parameterType: STRING
comp-model-evaluation-import-2:
executorLabel: exec-model-evaluation-import-2
inputDefinitions:
artifacts:
classification_metrics:
artifactType:
schemaTitle: google.ClassificationMetrics
schemaVersion: 0.0.1
description: 'Path of classification metrics generated from the
classification evaluation component.'
isOptional: true
explanation:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
description: 'Path for model explanation metrics generated from an evaluation
component.'
isOptional: true
feature_attributions:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
description: 'The feature attributions metrics artifact generated
from the feature attribution component.'
isOptional: true
forecasting_metrics:
artifactType:
schemaTitle: google.ForecastingMetrics
schemaVersion: 0.0.1
description: 'Path of forecasting metrics generated from the
forecasting evaluation component.'
isOptional: true
metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
description: Path of metrics generated from an evaluation component.
isOptional: true
model:
artifactType:
schemaTitle: google.VertexModel
schemaVersion: 0.0.1
description: 'Vertex model resource that will be the parent resource of
the
uploaded evaluation.'
question_answering_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
isOptional: true
regression_metrics:
artifactType:
schemaTitle: google.RegressionMetrics
schemaVersion: 0.0.1
description: 'Path of regression metrics generated from the regression
evaluation component.'
isOptional: true
summarization_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
isOptional: true
text_generation_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
isOptional: true
parameters:
dataset_path:
defaultValue: ''
isOptional: true
parameterType: STRING
dataset_paths:
defaultValue: []
isOptional: true
parameterType: LIST
dataset_type:
defaultValue: ''
isOptional: true
parameterType: STRING
display_name:
defaultValue: ''
description: The display name for the uploaded model evaluation resource.
isOptional: true
parameterType: STRING
problem_type:
description: 'The problem type of the metrics being imported to the
VertexModel. `classification`, `regression`, and `forecasting` are the
currently supported problem types. Must be provided when `metrics` is
provided.'
isOptional: true
parameterType: STRING
outputDefinitions:
parameters:
gcp_resources:
parameterType: STRING
comp-model-evaluation-import-3:
executorLabel: exec-model-evaluation-import-3
inputDefinitions:
artifacts:
classification_metrics:
artifactType:
schemaTitle: google.ClassificationMetrics
schemaVersion: 0.0.1
description: 'Path of classification metrics generated from the
classification evaluation component.'
isOptional: true
explanation:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
description: 'Path for model explanation metrics generated from an evaluation
component.'
isOptional: true
feature_attributions:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
description: 'The feature attributions metrics artifact generated
from the feature attribution component.'
isOptional: true
forecasting_metrics:
artifactType:
schemaTitle: google.ForecastingMetrics
schemaVersion: 0.0.1
description: 'Path of forecasting metrics generated from the
forecasting evaluation component.'
isOptional: true
metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
description: Path of metrics generated from an evaluation component.
isOptional: true
model:
artifactType:
schemaTitle: google.VertexModel
schemaVersion: 0.0.1
description: 'Vertex model resource that will be the parent resource of
the
uploaded evaluation.'
question_answering_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
isOptional: true
regression_metrics:
artifactType:
schemaTitle: google.RegressionMetrics
schemaVersion: 0.0.1
description: 'Path of regression metrics generated from the regression
evaluation component.'
isOptional: true
summarization_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
isOptional: true
text_generation_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
isOptional: true
parameters:
dataset_path:
defaultValue: ''
isOptional: true
parameterType: STRING
dataset_paths:
defaultValue: []
isOptional: true
parameterType: LIST
dataset_type:
defaultValue: ''
isOptional: true
parameterType: STRING
display_name:
defaultValue: ''
description: The display name for the uploaded model evaluation resource.
isOptional: true
parameterType: STRING
problem_type:
description: 'The problem type of the metrics being imported to the
VertexModel. `classification`, `regression`, and `forecasting` are the
currently supported problem types. Must be provided when `metrics` is
provided.'
isOptional: true
parameterType: STRING
outputDefinitions:
parameters:
gcp_resources:
parameterType: STRING
comp-model-upload:
executorLabel: exec-model-upload
inputDefinitions:
artifacts:
explanation_metadata_artifact:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
isOptional: true
parent_model:
artifactType:
schemaTitle: google.VertexModel
schemaVersion: 0.0.1
isOptional: true
unmanaged_container_model:
artifactType:
schemaTitle: google.UnmanagedContainerModel
schemaVersion: 0.0.1
isOptional: true
parameters:
description:
defaultValue: ''
isOptional: true
parameterType: STRING
display_name:
parameterType: STRING
encryption_spec_key_name:
defaultValue: ''
isOptional: true
parameterType: STRING
explanation_metadata:
defaultValue: {}
isOptional: true
parameterType: STRUCT
explanation_parameters:
defaultValue: {}
isOptional: true
parameterType: STRUCT
labels:
defaultValue: {}
isOptional: true
parameterType: STRUCT
location:
defaultValue: us-central1
isOptional: true
parameterType: STRING
project:
parameterType: STRING
outputDefinitions:
artifacts:
model:
artifactType:
schemaTitle: google.VertexModel
schemaVersion: 0.0.1
parameters:
gcp_resources:
parameterType: STRING
comp-model-upload-2:
executorLabel: exec-model-upload-2
inputDefinitions:
artifacts:
explanation_metadata_artifact:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
isOptional: true
parent_model:
artifactType:
schemaTitle: google.VertexModel
schemaVersion: 0.0.1
isOptional: true
unmanaged_container_model:
artifactType:
schemaTitle: google.UnmanagedContainerModel
schemaVersion: 0.0.1
isOptional: true
parameters:
description:
defaultValue: ''
isOptional: true
parameterType: STRING
display_name:
parameterType: STRING
encryption_spec_key_name:
defaultValue: ''
isOptional: true
parameterType: STRING
explanation_metadata:
defaultValue: {}
isOptional: true
parameterType: STRUCT
explanation_parameters:
defaultValue: {}
isOptional: true
parameterType: STRUCT
labels:
defaultValue: {}
isOptional: true
parameterType: STRUCT
location:
defaultValue: us-central1
isOptional: true
parameterType: STRING
project:
parameterType: STRING
outputDefinitions:
artifacts:
model:
artifactType:
schemaTitle: google.VertexModel
schemaVersion: 0.0.1
parameters:
gcp_resources:
parameterType: STRING
comp-model-upload-3:
executorLabel: exec-model-upload-3
inputDefinitions:
artifacts:
explanation_metadata_artifact:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
isOptional: true
parent_model:
artifactType:
schemaTitle: google.VertexModel
schemaVersion: 0.0.1
isOptional: true
unmanaged_container_model:
artifactType:
schemaTitle: google.UnmanagedContainerModel
schemaVersion: 0.0.1
isOptional: true
parameters:
description:
defaultValue: ''
isOptional: true
parameterType: STRING
display_name:
parameterType: STRING
encryption_spec_key_name:
defaultValue: ''
isOptional: true
parameterType: STRING
explanation_metadata:
defaultValue: {}
isOptional: true
parameterType: STRUCT
explanation_parameters:
defaultValue: {}
isOptional: true
parameterType: STRUCT
labels:
defaultValue: {}
isOptional: true
parameterType: STRUCT
location:
defaultValue: us-central1
isOptional: true
parameterType: STRING
project:
parameterType: STRING
outputDefinitions:
artifacts:
model:
artifactType:
schemaTitle: google.VertexModel
schemaVersion: 0.0.1
parameters:
gcp_resources:
parameterType: STRING
comp-read-input-uri:
executorLabel: exec-read-input-uri
inputDefinitions:
artifacts:
split_uri:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
description: Tbe path to the file that contains Dataset data.
outputDefinitions:
parameters:
Output:
parameterType: LIST
comp-read-input-uri-2:
executorLabel: exec-read-input-uri-2
inputDefinitions:
artifacts:
split_uri:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
description: Tbe path to the file that contains Dataset data.
outputDefinitions:
parameters:
Output:
parameterType: LIST
comp-set-optional-inputs:
executorLabel: exec-set-optional-inputs
inputDefinitions:
artifacts:
vertex_dataset:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The Vertex dataset when data source is Vertex dataset.
parameters:
data_source_bigquery_table_path:
description: The BigQuery table when data source is BQ.
parameterType: STRING
data_source_csv_filenames:
description: The CSV GCS path when data source is CSV.
parameterType: STRING
location:
description: The GCP region that runs the pipeline components.
parameterType: STRING
model_display_name:
description: The uploaded model's display name.
parameterType: STRING
project:
description: The GCP project that runs the pipeline components.
parameterType: STRING
outputDefinitions:
parameters:
data_source_bigquery_table_path:
parameterType: STRING
data_source_csv_filenames:
parameterType: STRING
model_display_name:
parameterType: STRING
comp-string-not-empty:
executorLabel: exec-string-not-empty
inputDefinitions:
parameters:
value:
description: String value to be checked.
parameterType: STRING
outputDefinitions:
parameters:
Output:
parameterType: STRING
comp-tabular-stats-and-example-gen:
executorLabel: exec-tabular-stats-and-example-gen
inputDefinitions:
parameters:
additional_experiments:
defaultValue: ''
isOptional: true
parameterType: STRING
additional_experiments_json:
defaultValue: {}
isOptional: true
parameterType: STRUCT
data_source_bigquery_table_path:
defaultValue: ''
isOptional: true
parameterType: STRING
data_source_csv_filenames:
defaultValue: ''
isOptional: true
parameterType: STRING
dataflow_disk_size_gb:
defaultValue: 40.0
description: 'The disk size, in gigabytes, to use
on each Dataflow worker instance. If not set, default to 40.'
isOptional: true
parameterType: NUMBER_INTEGER
dataflow_machine_type:
defaultValue: n1-standard-16
description: 'The machine type used for dataflow
jobs. If not set, default to n1-standard-16.'
isOptional: true
parameterType: STRING
dataflow_max_num_workers:
defaultValue: 25.0
description: 'The number of workers to run the
dataflow job. If not set, default to 25.'
isOptional: true
parameterType: NUMBER_INTEGER
dataflow_service_account:
defaultValue: ''
description: 'Custom service account to run
dataflow jobs.'
isOptional: true
parameterType: STRING
dataflow_subnetwork:
defaultValue: ''
description: 'Dataflow''s fully qualified subnetwork
name, when empty the default subnetwork will be used. More
details:
https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications'
isOptional: true
parameterType: STRING
dataflow_use_public_ips:
defaultValue: true
description: 'Specifies whether Dataflow
workers use public IP addresses.'
isOptional: true
parameterType: BOOLEAN
enable_probabilistic_inference:
defaultValue: false
isOptional: true
parameterType: BOOLEAN
encryption_spec_key_name:
defaultValue: ''
description: Customer-managed encryption key.
isOptional: true
parameterType: STRING
location:
description: 'Location for running dataset statistics and example
generation.'
parameterType: STRING
optimization_objective:
defaultValue: ''
description: "Objective function the model is optimizing\ntowards. The training\
\ process creates a model that maximizes/minimizes\nthe value of the objective\
\ function over the validation set. The\nsupported optimization objectives\
\ depend on the prediction type. If the\nfield is not set, a default objective\
\ function is used.\n classification: \"maximize-au-roc\" (default) -\
\ Maximize the\n area under the receiver operating characteristic (ROC)\
\ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\
\ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\
\ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\
\ - Maximize recall for a\n specified precision value.\n classification\
\ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\
\ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\
\ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\
\ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)."
isOptional: true
parameterType: STRING
optimization_objective_precision_value:
defaultValue: -1.0
description: 'Required when
optimization_objective is "maximize-recall-at-precision". Must be
between 0 and 1, inclusive.'
isOptional: true
parameterType: NUMBER_DOUBLE
optimization_objective_recall_value:
defaultValue: -1.0
description: 'Required when
optimization_objective is "maximize-precision-at-recall". Must be
between 0 and 1, inclusive.'
isOptional: true
parameterType: NUMBER_DOUBLE
predefined_split_key:
defaultValue: ''
isOptional: true
parameterType: STRING
prediction_type:
description: 'The prediction type. Supported values:
"classification", "regression".'
parameterType: STRING
project:
description: 'Project to run dataset statistics and example
generation.'
parameterType: STRING
quantiles:
defaultValue: []
isOptional: true
parameterType: LIST
request_type:
defaultValue: COLUMN_STATS_ONLY
isOptional: true
parameterType: STRING
root_dir:
description: The Cloud Storage location to store the output.
parameterType: STRING
run_distillation:
defaultValue: false
description: 'True if in distillation mode. The default value
is false.'
isOptional: true
parameterType: BOOLEAN
stratified_split_key:
defaultValue: ''
isOptional: true
parameterType: STRING
target_column_name:
description: The target column name.
parameterType: STRING
test_fraction:
defaultValue: -1.0
isOptional: true
parameterType: NUMBER_DOUBLE
timestamp_split_key:
defaultValue: ''
isOptional: true
parameterType: STRING
training_fraction:
defaultValue: -1.0
isOptional: true
parameterType: NUMBER_DOUBLE
transformations:
description: 'Quote escaped JSON string for transformations. Each
transformation will apply transform function to given input column. And
the result will be used for training. When creating transformation for
BigQuery Struct column, the column should be flattened using "." as the
delimiter.'
parameterType: STRING
transformations_path:
defaultValue: ''
description: 'Path to a GCS file containing JSON
string for transformations.'
isOptional: true
parameterType: STRING
validation_fraction:
defaultValue: -1.0
isOptional: true
parameterType: NUMBER_DOUBLE
weight_column_name:
defaultValue: ''
description: The weight column name.
isOptional: true
parameterType: STRING
outputDefinitions:
artifacts:
dataset_schema:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The schema of the dataset.
dataset_stats:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The stats of the dataset.
eval_split:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
description: The eval split.
instance_baseline:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The instance baseline used to calculate explanations.
metadata:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The tabular example gen metadata.
test_split:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
description: The test split.
train_split:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
description: The train split.
parameters:
downsampled_test_split_json:
description: The downsampled test split JSON object.
parameterType: LIST
gcp_resources:
description: 'GCP resources created by this component. For more details,
see
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.'
parameterType: STRING
test_split_json:
description: The test split JSON object.
parameterType: LIST
comp-write-bp-result-path:
executorLabel: exec-write-bp-result-path
inputDefinitions:
artifacts:
bp_job:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The batch prediction job artifact.
outputDefinitions:
artifacts:
result:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
comp-write-bp-result-path-2:
executorLabel: exec-write-bp-result-path-2
inputDefinitions:
artifacts:
bp_job:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The batch prediction job artifact.
outputDefinitions:
artifacts:
result:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
deploymentSpec:
executors:
exec-automl-tabular-cv-trainer:
container:
args:
- --type
- CustomJob
- --project
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --payload
- '{"Concat": ["{\"display_name\": \"automl-tabular-cv-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\",
\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
"\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\":
{\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"",
"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\",
\"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}",
"\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125",
"\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=",
"{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\",
\"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}",
"\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}",
"\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}",
"\", \"--valid_trials_completed_threshold=0.7\", \"--num_selected_trials=",
"{{$.inputs.parameters[''num_selected_trials'']}}", "\", \"--num_selected_features=",
"{{$.inputs.parameters[''num_selected_features'']}}", "\", \"--lro_job_info=",
"{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\",
\"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\",
\"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_cv_splits=",
"{{$.inputs.artifacts[''materialized_cv_splits''].uri}}", "\", \"--tuning_result_input_path=",
"{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--tuning_result_output_path=",
"{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=",
"{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=",
"{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=",
"{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_custom_job=true\",
\"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}'
command:
- python3
- -u
- -m
- google_cloud_pipeline_components.container.v1.custom_job.launcher
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44
exec-automl-tabular-cv-trainer-2:
container:
args:
- --type
- CustomJob
- --project
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --payload
- '{"Concat": ["{\"display_name\": \"automl-tabular-cv-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\",
\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
"\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\":
{\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"",
"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\",
\"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}",
"\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125",
"\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=",
"{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\",
\"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}",
"\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}",
"\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}",
"\", \"--valid_trials_completed_threshold=0.7\", \"--num_selected_trials=",
"{{$.inputs.parameters[''num_selected_trials'']}}", "\", \"--num_selected_features=",
"{{$.inputs.parameters[''num_selected_features'']}}", "\", \"--lro_job_info=",
"{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\",
\"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\",
\"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_cv_splits=",
"{{$.inputs.artifacts[''materialized_cv_splits''].uri}}", "\", \"--tuning_result_input_path=",
"{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--tuning_result_output_path=",
"{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=",
"{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=",
"{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=",
"{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_custom_job=true\",
\"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}'
command:
- python3
- -u
- -m
- google_cloud_pipeline_components.container.v1.custom_job.launcher
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44
exec-automl-tabular-ensemble:
container:
args:
- --type
- CustomJob
- --project
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --payload
- '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\",
\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
"\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\":
{\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"",
"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\",
\"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}",
"\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\",
\"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}",
"/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=",
"{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\",
\"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}",
"\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\",
\"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}",
"\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}",
"\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}",
"\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\",
\"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125",
"\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=",
"{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=",
"{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}",
"\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}",
"\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}",
"\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}'
command:
- python3
- -u
- -m
- google_cloud_pipeline_components.container.v1.custom_job.launcher
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44
exec-automl-tabular-ensemble-2:
container:
args:
- --type
- CustomJob
- --project
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --payload
- '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\",
\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
"\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\":
{\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"",
"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\",
\"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}",
"\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\",
\"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}",
"/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=",
"{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\",
\"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}",
"\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\",
\"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}",
"\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}",
"\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}",
"\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\",
\"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125",
"\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=",
"{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=",
"{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}",
"\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}",
"\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}",
"\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}'
command:
- python3
- -u
- -m
- google_cloud_pipeline_components.container.v1.custom_job.launcher
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44
exec-automl-tabular-ensemble-3:
container:
args:
- --type
- CustomJob
- --project
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --payload
- '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\",
\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
"\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\":
{\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"",
"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\",
\"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}",
"\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\",
\"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}",
"/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=",
"{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\",
\"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}",
"\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\",
\"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}",
"\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}",
"\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}",
"\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\",
\"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125",
"\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=",
"{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=",
"{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}",
"\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}",
"\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}",
"\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}'
command:
- python3
- -u
- -m
- google_cloud_pipeline_components.container.v1.custom_job.launcher
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44
exec-automl-tabular-finalizer:
container:
args:
- --type
- CustomJob
- --project
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --payload
- '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\",
\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
"\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\":
{\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"",
"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\",
\"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}",
"/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=",
"{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}'
command:
- python3
- -u
- -m
- google_cloud_pipeline_components.container.v1.custom_job.launcher
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44
exec-automl-tabular-infra-validator:
container:
args:
- --executor_input
- '{{$}}'
image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125
resources:
cpuLimit: 8.0
memoryLimit: 52.0
exec-automl-tabular-infra-validator-2:
container:
args:
- --executor_input
- '{{$}}'
image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125
resources:
cpuLimit: 8.0
memoryLimit: 52.0
exec-automl-tabular-infra-validator-3:
container:
args:
- --executor_input
- '{{$}}'
image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125
resources:
cpuLimit: 8.0
memoryLimit: 52.0
exec-automl-tabular-stage-1-tuner:
container:
args:
- --type
- CustomJob
- --project
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --payload
- '{"Concat": ["{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\",
\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
"\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\":
{\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"",
"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\",
\"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}",
"\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125",
"\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}",
"\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}",
"\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}",
"\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}",
"\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=",
"{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\",
\"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}",
"\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}",
"\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}",
"\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}",
"\", \"--num_selected_features=", "{{$.inputs.parameters[''num_selected_features'']}}",
"\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\",
\"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\",
\"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_train_split=",
"{{$.inputs.artifacts[''materialized_train_split''].uri}}", "\", \"--materialized_eval_split=",
"{{$.inputs.artifacts[''materialized_eval_split''].uri}}", "\", \"--is_distill=",
"{{$.inputs.parameters[''run_distillation'']}}", "\", \"--tuning_result_output_path=",
"{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=",
"{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=",
"{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=",
"{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_json=true\",
\"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}'
command:
- python3
- -u
- -m
- google_cloud_pipeline_components.container.v1.custom_job.launcher
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44
exec-automl-tabular-stage-1-tuner-2:
container:
args:
- --type
- CustomJob
- --project
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --payload
- '{"Concat": ["{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\",
\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
"\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\":
{\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"",
"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\",
\"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}",
"\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125",
"\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}",
"\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}",
"\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}",
"\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}",
"\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=",
"{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\",
\"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}",
"\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}",
"\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}",
"\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}",
"\", \"--num_selected_features=", "{{$.inputs.parameters[''num_selected_features'']}}",
"\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\",
\"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\",
\"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_train_split=",
"{{$.inputs.artifacts[''materialized_train_split''].uri}}", "\", \"--materialized_eval_split=",
"{{$.inputs.artifacts[''materialized_eval_split''].uri}}", "\", \"--is_distill=",
"{{$.inputs.parameters[''run_distillation'']}}", "\", \"--tuning_result_output_path=",
"{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=",
"{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=",
"{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=",
"{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_json=true\",
\"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}'
command:
- python3
- -u
- -m
- google_cloud_pipeline_components.container.v1.custom_job.launcher
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44
exec-automl-tabular-transform:
container:
args:
- --type
- CustomJob
- --project
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --payload
- '{"Concat": ["{\"display_name\": \"automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\",
\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
"\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\":
{\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"",
"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\",
\"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=",
"{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=",
"{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\",
\"--materialized_splits_output_path=", "{{$.inputs.parameters[''root_dir'']}}",
"/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform_materialized\",
\"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--dataset_schema_path=",
"{{$.inputs.artifacts[''dataset_schema''].uri}}", "\", \"--train_split=",
"{{$.inputs.artifacts[''train_split''].uri}}", "\", \"--eval_split=", "{{$.inputs.artifacts[''eval_split''].uri}}",
"\", \"--test_split=", "{{$.inputs.artifacts[''test_split''].uri}}", "\",
\"--materialized_train_split=", "{{$.outputs.artifacts[''materialized_train_split''].uri}}",
"\", \"--materialized_eval_split=", "{{$.outputs.artifacts[''materialized_eval_split''].uri}}",
"\", \"--materialized_test_split=", "{{$.outputs.artifacts[''materialized_test_split''].uri}}",
"\", \"--training_schema_path=", "{{$.outputs.artifacts[''training_schema_uri''].uri}}",
"\", \"--job_name=automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}",
"\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\",
\"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\",
\"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\",
\"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\",
\"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}",
"\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}",
"\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125",
"\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}",
"\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}",
"\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}",
"\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
"\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}",
"\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\",
\"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}",
"\"]}}]}}"]}'
command:
- python3
- -u
- -m
- google_cloud_pipeline_components.container.v1.custom_job.launcher
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44
exec-automl-tabular-transform-2:
container:
args:
- --type
- CustomJob
- --project
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --payload
- '{"Concat": ["{\"display_name\": \"automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\",
\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
"\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\":
{\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"",
"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\",
\"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=",
"{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=",
"{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\",
\"--materialized_splits_output_path=", "{{$.inputs.parameters[''root_dir'']}}",
"/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform_materialized\",
\"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--dataset_schema_path=",
"{{$.inputs.artifacts[''dataset_schema''].uri}}", "\", \"--train_split=",
"{{$.inputs.artifacts[''train_split''].uri}}", "\", \"--eval_split=", "{{$.inputs.artifacts[''eval_split''].uri}}",
"\", \"--test_split=", "{{$.inputs.artifacts[''test_split''].uri}}", "\",
\"--materialized_train_split=", "{{$.outputs.artifacts[''materialized_train_split''].uri}}",
"\", \"--materialized_eval_split=", "{{$.outputs.artifacts[''materialized_eval_split''].uri}}",
"\", \"--materialized_test_split=", "{{$.outputs.artifacts[''materialized_test_split''].uri}}",
"\", \"--training_schema_path=", "{{$.outputs.artifacts[''training_schema_uri''].uri}}",
"\", \"--job_name=automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}",
"\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\",
\"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\",
\"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\",
\"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\",
\"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}",
"\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}",
"\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125",
"\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}",
"\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}",
"\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}",
"\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
"\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}",
"\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\",
\"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}",
"\"]}}]}}"]}'
command:
- python3
- -u
- -m
- google_cloud_pipeline_components.container.v1.custom_job.launcher
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44
exec-bool-identity:
container:
args:
- --executor_input
- '{{$}}'
- --function_to_execute
- _bool_identity
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\
\ && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\
\ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\
\ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n"
image: python:3.7
exec-bool-identity-2:
container:
args:
- --executor_input
- '{{$}}'
- --function_to_execute
- _bool_identity
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\
\ && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\
\ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\
\ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n"
image: python:3.7
exec-bool-identity-3:
container:
args:
- --executor_input
- '{{$}}'
- --function_to_execute
- _bool_identity
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\
\ && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\
\ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\
\ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n"
image: python:3.7
exec-calculate-training-parameters:
container:
args:
- --executor_input
- '{{$}}'
- --function_to_execute
- _calculate_training_parameters
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\
\ && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\
\ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\
\ int,\n run_distillation: bool,\n is_skip_architecture_search: bool\
\ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\
\ [\n ('stage_1_deadline_hours', float),\n ('stage_1_num_selected_trials',\
\ int),\n ('stage_1_single_run_max_secs', int),\n ('stage_2_deadline_hours',\
\ float),\n ('stage_2_single_run_max_secs', int),\n ('distill_stage_1_deadline_hours',\
\ float),\n ('reduce_search_space_mode', str),\n ],\n):\n \"\"\
\"Calculates training parameters.\n\n Args:\n stage_1_num_parallel_trials:\
\ Number of parallel trails for stage 1.\n train_budget_milli_node_hours:\
\ The train budget of creating this model,\n expressed in milli node\
\ hours i.e. 1,000 value in this field means 1 node\n hour.\n stage_2_num_parallel_trials:\
\ Number of parallel trails for stage 2.\n run_distillation: Whether\
\ to run distill in the training pipeline.\n is_skip_architecture_search:\
\ If component is being called in the\n skip_architecture_search pipeline.\n\
\ fast_testing: Internal flag used for presubmit tests.\n\n Returns:\n\
\ stage_1_deadline_hours: Maximum number of hours to run stage 1.\n\
\ stage_1_num_selected_trials: Number of selected trails for stage\
\ 1.\n stage_1_single_run_max_secs: Maximum number seconds to for a\
\ single stage\n 1\n training trial.\n stage_2_deadline_hours:\
\ Maximum number of hours to run stage 2.\n stage_2_single_run_max_secs:\
\ Maximum number seconds to for a single stage\n 2\n training\
\ trial.\n distill_stage_1_deadline_hours: Maximum number of hours\
\ to run stage 1 for\n the model distillation.\n reduce_search_space_mode:\
\ The reduce search space mode. Possible values:\n minimal, regular,\
\ full.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\
\ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\
\ num_folds = 5\n distill_total_trials = 100\n\n stage_1_deadline_hours\
\ = -1.0\n stage_1_num_selected_trials = -1\n stage_1_single_run_max_secs\
\ = -1\n stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs =\
\ -1\n distill_stage_1_deadline_hours = 1.0\n reduce_search_space_mode\
\ = 'regular'\n\n if is_skip_architecture_search:\n stage_2_deadline_hours\
\ = train_budget_milli_node_hours / 1000.0\n stage_2_single_run_max_secs\
\ = int(stage_2_deadline_hours * 3600.0 / 1.3)\n else:\n hours = float(train_budget_milli_node_hours)\
\ / 1000.0\n multiplier = stage_1_num_parallel_trials * hours / 500.0\n\
\ stage_1_single_run_max_secs = int(math.sqrt(multiplier) * 2400.0)\n\
\ phase_2_rounds = int(\n math.sqrt(multiplier) * 100 / stage_2_num_parallel_trials\
\ + 0.5\n )\n if phase_2_rounds < 1:\n phase_2_rounds = 1\n\n\
\ # All of magic number \"1.3\" above is because the trial doesn't\n\
\ # always finish in time_per_trial. 1.3 is an empirical safety margin\
\ here.\n stage_1_deadline_secs = int(\n hours * 3600.0 - 1.3\
\ * stage_1_single_run_max_secs * phase_2_rounds\n )\n\n if stage_1_deadline_secs\
\ < hours * 3600.0 * 0.5:\n stage_1_deadline_secs = int(hours * 3600.0\
\ * 0.5)\n # Phase 1 deadline is the same as phase 2 deadline in this\
\ case. Phase 2\n # can't finish in time after the deadline is cut,\
\ so adjust the time per\n # trial to meet the deadline.\n stage_1_single_run_max_secs\
\ = int(\n stage_1_deadline_secs / (1.3 * phase_2_rounds)\n \
\ )\n\n reduce_search_space_mode = 'minimal'\n if multiplier > 2:\n\
\ reduce_search_space_mode = 'regular'\n if multiplier > 4:\n \
\ reduce_search_space_mode = 'full'\n\n # Stage 2 number of trials\
\ is stage_1_num_selected_trials *\n # num_folds, which should be equal\
\ to phase_2_rounds *\n # stage_2_num_parallel_trials. Use this information\
\ to calculate\n # stage_1_num_selected_trials:\n stage_1_num_selected_trials\
\ = int(\n phase_2_rounds * stage_2_num_parallel_trials / num_folds\n\
\ )\n stage_1_deadline_hours = stage_1_deadline_secs / 3600.0\n\n\
\ stage_2_deadline_hours = hours - stage_1_deadline_hours\n stage_2_single_run_max_secs\
\ = stage_1_single_run_max_secs\n\n if run_distillation:\n # All\
\ of magic number \"1.3\" above is because the trial doesn't always\n \
\ # finish in time_per_trial. 1.3 is an empirical safety margin here.\n\
\ distill_stage_1_deadline_hours = (\n math.ceil(float(distill_total_trials)\
\ / stage_1_num_parallel_trials)\n * stage_1_single_run_max_secs\n\
\ * 1.3\n / 3600.0\n )\n\n if fast_testing:\n \
\ distill_stage_1_deadline_hours = 0.2\n stage_1_deadline_hours = 0.2\n\
\ stage_1_single_run_max_secs = 1\n stage_2_deadline_hours = 0.2\n\
\ stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\
\ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \
\ 'stage_1_num_selected_trials',\n 'stage_1_single_run_max_secs',\n\
\ 'stage_2_deadline_hours',\n 'stage_2_single_run_max_secs',\n\
\ 'distill_stage_1_deadline_hours',\n 'reduce_search_space_mode',\n\
\ ],\n )(\n stage_1_deadline_hours,\n stage_1_num_selected_trials,\n\
\ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \
\ stage_2_single_run_max_secs,\n distill_stage_1_deadline_hours,\n\
\ reduce_search_space_mode,\n )\n\n"
image: python:3.7
exec-calculate-training-parameters-2:
container:
args:
- --executor_input
- '{{$}}'
- --function_to_execute
- _calculate_training_parameters
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\
\ && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\
\ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\
\ int,\n run_distillation: bool,\n is_skip_architecture_search: bool\
\ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\
\ [\n ('stage_1_deadline_hours', float),\n ('stage_1_num_selected_trials',\
\ int),\n ('stage_1_single_run_max_secs', int),\n ('stage_2_deadline_hours',\
\ float),\n ('stage_2_single_run_max_secs', int),\n ('distill_stage_1_deadline_hours',\
\ float),\n ('reduce_search_space_mode', str),\n ],\n):\n \"\"\
\"Calculates training parameters.\n\n Args:\n stage_1_num_parallel_trials:\
\ Number of parallel trails for stage 1.\n train_budget_milli_node_hours:\
\ The train budget of creating this model,\n expressed in milli node\
\ hours i.e. 1,000 value in this field means 1 node\n hour.\n stage_2_num_parallel_trials:\
\ Number of parallel trails for stage 2.\n run_distillation: Whether\
\ to run distill in the training pipeline.\n is_skip_architecture_search:\
\ If component is being called in the\n skip_architecture_search pipeline.\n\
\ fast_testing: Internal flag used for presubmit tests.\n\n Returns:\n\
\ stage_1_deadline_hours: Maximum number of hours to run stage 1.\n\
\ stage_1_num_selected_trials: Number of selected trails for stage\
\ 1.\n stage_1_single_run_max_secs: Maximum number seconds to for a\
\ single stage\n 1\n training trial.\n stage_2_deadline_hours:\
\ Maximum number of hours to run stage 2.\n stage_2_single_run_max_secs:\
\ Maximum number seconds to for a single stage\n 2\n training\
\ trial.\n distill_stage_1_deadline_hours: Maximum number of hours\
\ to run stage 1 for\n the model distillation.\n reduce_search_space_mode:\
\ The reduce search space mode. Possible values:\n minimal, regular,\
\ full.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\
\ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\
\ num_folds = 5\n distill_total_trials = 100\n\n stage_1_deadline_hours\
\ = -1.0\n stage_1_num_selected_trials = -1\n stage_1_single_run_max_secs\
\ = -1\n stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs =\
\ -1\n distill_stage_1_deadline_hours = 1.0\n reduce_search_space_mode\
\ = 'regular'\n\n if is_skip_architecture_search:\n stage_2_deadline_hours\
\ = train_budget_milli_node_hours / 1000.0\n stage_2_single_run_max_secs\
\ = int(stage_2_deadline_hours * 3600.0 / 1.3)\n else:\n hours = float(train_budget_milli_node_hours)\
\ / 1000.0\n multiplier = stage_1_num_parallel_trials * hours / 500.0\n\
\ stage_1_single_run_max_secs = int(math.sqrt(multiplier) * 2400.0)\n\
\ phase_2_rounds = int(\n math.sqrt(multiplier) * 100 / stage_2_num_parallel_trials\
\ + 0.5\n )\n if phase_2_rounds < 1:\n phase_2_rounds = 1\n\n\
\ # All of magic number \"1.3\" above is because the trial doesn't\n\
\ # always finish in time_per_trial. 1.3 is an empirical safety margin\
\ here.\n stage_1_deadline_secs = int(\n hours * 3600.0 - 1.3\
\ * stage_1_single_run_max_secs * phase_2_rounds\n )\n\n if stage_1_deadline_secs\
\ < hours * 3600.0 * 0.5:\n stage_1_deadline_secs = int(hours * 3600.0\
\ * 0.5)\n # Phase 1 deadline is the same as phase 2 deadline in this\
\ case. Phase 2\n # can't finish in time after the deadline is cut,\
\ so adjust the time per\n # trial to meet the deadline.\n stage_1_single_run_max_secs\
\ = int(\n stage_1_deadline_secs / (1.3 * phase_2_rounds)\n \
\ )\n\n reduce_search_space_mode = 'minimal'\n if multiplier > 2:\n\
\ reduce_search_space_mode = 'regular'\n if multiplier > 4:\n \
\ reduce_search_space_mode = 'full'\n\n # Stage 2 number of trials\
\ is stage_1_num_selected_trials *\n # num_folds, which should be equal\
\ to phase_2_rounds *\n # stage_2_num_parallel_trials. Use this information\
\ to calculate\n # stage_1_num_selected_trials:\n stage_1_num_selected_trials\
\ = int(\n phase_2_rounds * stage_2_num_parallel_trials / num_folds\n\
\ )\n stage_1_deadline_hours = stage_1_deadline_secs / 3600.0\n\n\
\ stage_2_deadline_hours = hours - stage_1_deadline_hours\n stage_2_single_run_max_secs\
\ = stage_1_single_run_max_secs\n\n if run_distillation:\n # All\
\ of magic number \"1.3\" above is because the trial doesn't always\n \
\ # finish in time_per_trial. 1.3 is an empirical safety margin here.\n\
\ distill_stage_1_deadline_hours = (\n math.ceil(float(distill_total_trials)\
\ / stage_1_num_parallel_trials)\n * stage_1_single_run_max_secs\n\
\ * 1.3\n / 3600.0\n )\n\n if fast_testing:\n \
\ distill_stage_1_deadline_hours = 0.2\n stage_1_deadline_hours = 0.2\n\
\ stage_1_single_run_max_secs = 1\n stage_2_deadline_hours = 0.2\n\
\ stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\
\ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \
\ 'stage_1_num_selected_trials',\n 'stage_1_single_run_max_secs',\n\
\ 'stage_2_deadline_hours',\n 'stage_2_single_run_max_secs',\n\
\ 'distill_stage_1_deadline_hours',\n 'reduce_search_space_mode',\n\
\ ],\n )(\n stage_1_deadline_hours,\n stage_1_num_selected_trials,\n\
\ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \
\ stage_2_single_run_max_secs,\n distill_stage_1_deadline_hours,\n\
\ reduce_search_space_mode,\n )\n\n"
image: python:3.7
exec-feature-attribution:
container:
args:
- --task
- explanation
- --setup_file
- /setup.py
- --project_id
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --root_dir
- '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'
- --batch_prediction_format
- '{{$.inputs.parameters[''predictions_format'']}}'
- '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source",
"{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}'
- '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source",
{"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}",
".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}",
".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}'
- --dataflow_job_prefix
- evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}
- --dataflow_service_account
- '{{$.inputs.parameters[''dataflow_service_account'']}}'
- --dataflow_disk_size
- '{{$.inputs.parameters[''dataflow_disk_size'']}}'
- --dataflow_machine_type
- '{{$.inputs.parameters[''dataflow_machine_type'']}}'
- --dataflow_workers_num
- '{{$.inputs.parameters[''dataflow_workers_num'']}}'
- --dataflow_max_workers_num
- '{{$.inputs.parameters[''dataflow_max_workers_num'']}}'
- --dataflow_subnetwork
- '{{$.inputs.parameters[''dataflow_subnetwork'']}}'
- --dataflow_use_public_ips
- '{{$.inputs.parameters[''dataflow_use_public_ips'']}}'
- --kms_key_name
- '{{$.inputs.parameters[''encryption_spec_key_name'']}}'
- --force_direct_runner
- '{{$.inputs.parameters[''force_direct_runner'']}}'
- --gcs_output_path
- '{{$.outputs.artifacts[''feature_attributions''].path}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --executor_input
- '{{$}}'
command:
- python3
- /main.py
image: gcr.io/ml-pipeline/model-evaluation:v0.9
exec-feature-attribution-2:
container:
args:
- --task
- explanation
- --setup_file
- /setup.py
- --project_id
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --root_dir
- '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'
- --batch_prediction_format
- '{{$.inputs.parameters[''predictions_format'']}}'
- '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source",
"{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}'
- '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source",
{"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}",
".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}",
".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}'
- --dataflow_job_prefix
- evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}
- --dataflow_service_account
- '{{$.inputs.parameters[''dataflow_service_account'']}}'
- --dataflow_disk_size
- '{{$.inputs.parameters[''dataflow_disk_size'']}}'
- --dataflow_machine_type
- '{{$.inputs.parameters[''dataflow_machine_type'']}}'
- --dataflow_workers_num
- '{{$.inputs.parameters[''dataflow_workers_num'']}}'
- --dataflow_max_workers_num
- '{{$.inputs.parameters[''dataflow_max_workers_num'']}}'
- --dataflow_subnetwork
- '{{$.inputs.parameters[''dataflow_subnetwork'']}}'
- --dataflow_use_public_ips
- '{{$.inputs.parameters[''dataflow_use_public_ips'']}}'
- --kms_key_name
- '{{$.inputs.parameters[''encryption_spec_key_name'']}}'
- --force_direct_runner
- '{{$.inputs.parameters[''force_direct_runner'']}}'
- --gcs_output_path
- '{{$.outputs.artifacts[''feature_attributions''].path}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --executor_input
- '{{$}}'
command:
- python3
- /main.py
image: gcr.io/ml-pipeline/model-evaluation:v0.9
exec-feature-attribution-3:
container:
args:
- --task
- explanation
- --setup_file
- /setup.py
- --project_id
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --root_dir
- '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'
- --batch_prediction_format
- '{{$.inputs.parameters[''predictions_format'']}}'
- '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source",
"{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}'
- '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source",
{"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}",
".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}",
".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}'
- --dataflow_job_prefix
- evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}
- --dataflow_service_account
- '{{$.inputs.parameters[''dataflow_service_account'']}}'
- --dataflow_disk_size
- '{{$.inputs.parameters[''dataflow_disk_size'']}}'
- --dataflow_machine_type
- '{{$.inputs.parameters[''dataflow_machine_type'']}}'
- --dataflow_workers_num
- '{{$.inputs.parameters[''dataflow_workers_num'']}}'
- --dataflow_max_workers_num
- '{{$.inputs.parameters[''dataflow_max_workers_num'']}}'
- --dataflow_subnetwork
- '{{$.inputs.parameters[''dataflow_subnetwork'']}}'
- --dataflow_use_public_ips
- '{{$.inputs.parameters[''dataflow_use_public_ips'']}}'
- --kms_key_name
- '{{$.inputs.parameters[''encryption_spec_key_name'']}}'
- --force_direct_runner
- '{{$.inputs.parameters[''force_direct_runner'']}}'
- --gcs_output_path
- '{{$.outputs.artifacts[''feature_attributions''].path}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --executor_input
- '{{$}}'
command:
- python3
- /main.py
image: gcr.io/ml-pipeline/model-evaluation:v0.9
exec-importer:
importer:
artifactUri:
runtimeParameter: uri
typeSchema:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
exec-merge-materialized-splits:
container:
args:
- --executor_input
- '{{$}}'
- --function_to_execute
- _merge_materialized_splits
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\
\ && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef _merge_materialized_splits(\n split_0: dsl.InputPath('MaterializedSplit'),\n\
\ split_1: dsl.InputPath('MaterializedSplit'),\n splits: dsl.OutputPath('MaterializedSplit'),\n\
):\n \"\"\"Merge two materialized splits.\n\n Args:\n split_0: The\
\ first materialized split.\n split_1: The second materialized split.\n\
\ splits: The merged materialized split.\n \"\"\"\n with open(split_0,\
\ 'r') as f:\n split_0_content = f.read()\n with open(split_1, 'r')\
\ as f:\n split_1_content = f.read()\n with open(splits, 'w') as f:\n\
\ f.write(','.join([split_0_content, split_1_content]))\n\n"
image: python:3.7
exec-model-batch-explanation:
container:
args:
- --type
- BatchPredictionJob
- --payload
- '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}",
"\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}",
"\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}",
"}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}",
"\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}",
", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}",
"\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}",
"\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}",
"\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\":
\"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\":
\"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\":
", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\":
", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\":
", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\":
{", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}",
"}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}",
", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}",
", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}",
"}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}",
"\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\":
{\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
"\"}", "}"]}'
- --project
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --executor_input
- '{{$}}'
command:
- python3
- -u
- -m
- launcher
image: gcr.io/ml-pipeline/automl-tables-private:1.0.13
exec-model-batch-explanation-2:
container:
args:
- --type
- BatchPredictionJob
- --payload
- '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}",
"\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}",
"\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}",
"}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}",
"\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}",
", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}",
"\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}",
"\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}",
"\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\":
\"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\":
\"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\":
", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\":
", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\":
", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\":
{", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}",
"}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}",
", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}",
", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}",
"}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}",
"\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\":
{\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
"\"}", "}"]}'
- --project
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --executor_input
- '{{$}}'
command:
- python3
- -u
- -m
- launcher
image: gcr.io/ml-pipeline/automl-tables-private:1.0.13
exec-model-batch-explanation-3:
container:
args:
- --type
- BatchPredictionJob
- --payload
- '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}",
"\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}",
"\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}",
"}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}",
"\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}",
", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}",
"\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}",
"\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}",
"\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\":
\"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\":
\"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\":
", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\":
", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\":
", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\":
{", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}",
"}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}",
", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}",
", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}",
"}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}",
"\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\":
{\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
"\"}", "}"]}'
- --project
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --executor_input
- '{{$}}'
command:
- python3
- -u
- -m
- launcher
image: gcr.io/ml-pipeline/automl-tables-private:1.0.13
exec-model-batch-predict:
container:
args:
- --type
- BatchPredictionJob
- --payload
- '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}",
"\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\":
\"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}},
" \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}",
"\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}",
"}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}",
"\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}",
"\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\"
", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [",
\"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}},
{"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\":
", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\":
", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\":
{", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}",
"\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}",
"\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}",
"\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\":
\"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\":
\"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\":
", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\":
", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\":
", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\":
{", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}",
"}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}",
", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}",
", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}",
"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\":
{\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
"\"}", "}"]}'
- --project
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --executor_input
- '{{$}}'
command:
- python3
- -u
- -m
- google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4
exec-model-batch-predict-2:
container:
args:
- --type
- BatchPredictionJob
- --payload
- '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}",
"\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\":
\"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}},
" \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}",
"\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}",
"}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}",
"\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}",
"\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\"
", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [",
\"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}},
{"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\":
", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\":
", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\":
{", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}",
"\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}",
"\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}",
"\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\":
\"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\":
\"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\":
", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\":
", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\":
", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\":
{", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}",
"}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}",
", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}",
", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}",
"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\":
{\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
"\"}", "}"]}'
- --project
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --executor_input
- '{{$}}'
command:
- python3
- -u
- -m
- google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4
exec-model-batch-predict-3:
container:
args:
- --type
- BatchPredictionJob
- --payload
- '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}",
"\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\":
\"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}},
" \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}",
"\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}",
"}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}",
"\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}",
"\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\"
", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [",
\"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}},
{"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\":
", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\":
", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\":
{", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}",
"\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}",
"\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}",
"\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\":
\"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\":
\"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\":
", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\":
", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\":
", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\":
{", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}",
"}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}",
", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}",
", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}",
"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\":
{\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
"\"}", "}"]}'
- --project
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --executor_input
- '{{$}}'
command:
- python3
- -u
- -m
- google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4
exec-model-batch-predict-4:
container:
args:
- --type
- BatchPredictionJob
- --payload
- '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}",
"\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\":
\"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}},
" \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}",
"\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}",
"}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}",
"\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}",
"\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\"
", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [",
\"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}},
{"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\":
", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\":
", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\":
{", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}",
"\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}",
"\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}",
"\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\":
\"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\":
\"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\":
", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\":
", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\":
", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\":
{", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}",
"}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}",
", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}",
", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}",
"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\":
{\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
"\"}", "}"]}'
- --project
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --executor_input
- '{{$}}'
command:
- python3
- -u
- -m
- google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4
exec-model-batch-predict-5:
container:
args:
- --type
- BatchPredictionJob
- --payload
- '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}",
"\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\":
\"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}},
" \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}",
"\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}",
"}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}",
"\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}",
"\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\"
", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [",
\"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}},
{"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\":
", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\":
", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\":
{", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}",
"\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}",
"\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}",
"\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\":
\"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\":
\"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\":
", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\":
", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\":
", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\":
{", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}",
"}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}",
", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}",
", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}",
"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\":
{\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
"\"}", "}"]}'
- --project
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --executor_input
- '{{$}}'
command:
- python3
- -u
- -m
- google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4
exec-model-evaluation:
container:
args:
- --setup_file
- /setup.py
- --json_mode
- 'true'
- --project_id
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --problem_type
- '{{$.inputs.parameters[''problem_type'']}}'
- --batch_prediction_format
- '{{$.inputs.parameters[''predictions_format'']}}'
- --batch_prediction_gcs_source
- '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}'
- --ground_truth_format
- '{{$.inputs.parameters[''ground_truth_format'']}}'
- --key_prefix_in_prediction_dataset
- instance
- --root_dir
- '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'
- --classification_type
- multiclass
- --ground_truth_column
- instance.{{$.inputs.parameters['ground_truth_column']}}
- --prediction_score_column
- '{{$.inputs.parameters[''prediction_score_column'']}}'
- --prediction_label_column
- '{{$.inputs.parameters[''prediction_label_column'']}}'
- --prediction_id_column
- ''
- --example_weight_column
- ''
- --generate_feature_attribution
- 'false'
- --dataflow_job_prefix
- evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}
- --dataflow_service_account
- '{{$.inputs.parameters[''dataflow_service_account'']}}'
- --dataflow_disk_size
- '{{$.inputs.parameters[''dataflow_disk_size'']}}'
- --dataflow_machine_type
- '{{$.inputs.parameters[''dataflow_machine_type'']}}'
- --dataflow_workers_num
- '{{$.inputs.parameters[''dataflow_workers_num'']}}'
- --dataflow_max_workers_num
- '{{$.inputs.parameters[''dataflow_max_workers_num'']}}'
- --dataflow_subnetwork
- '{{$.inputs.parameters[''dataflow_subnetwork'']}}'
- --dataflow_use_public_ips
- '{{$.inputs.parameters[''dataflow_use_public_ips'']}}'
- --kms_key_name
- '{{$.inputs.parameters[''encryption_spec_key_name'']}}'
- --output_metrics_gcs_path
- '{{$.outputs.artifacts[''evaluation_metrics''].uri}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --executor_input
- '{{$}}'
command:
- python
- /main.py
image: gcr.io/ml-pipeline/model-evaluation:v0.4
exec-model-evaluation-2:
container:
args:
- --setup_file
- /setup.py
- --json_mode
- 'true'
- --project_id
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --problem_type
- '{{$.inputs.parameters[''problem_type'']}}'
- --batch_prediction_format
- '{{$.inputs.parameters[''predictions_format'']}}'
- --batch_prediction_gcs_source
- '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}'
- --ground_truth_format
- '{{$.inputs.parameters[''ground_truth_format'']}}'
- --key_prefix_in_prediction_dataset
- instance
- --root_dir
- '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'
- --classification_type
- multiclass
- --ground_truth_column
- instance.{{$.inputs.parameters['ground_truth_column']}}
- --prediction_score_column
- '{{$.inputs.parameters[''prediction_score_column'']}}'
- --prediction_label_column
- '{{$.inputs.parameters[''prediction_label_column'']}}'
- --prediction_id_column
- ''
- --example_weight_column
- ''
- --generate_feature_attribution
- 'false'
- --dataflow_job_prefix
- evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}
- --dataflow_service_account
- '{{$.inputs.parameters[''dataflow_service_account'']}}'
- --dataflow_disk_size
- '{{$.inputs.parameters[''dataflow_disk_size'']}}'
- --dataflow_machine_type
- '{{$.inputs.parameters[''dataflow_machine_type'']}}'
- --dataflow_workers_num
- '{{$.inputs.parameters[''dataflow_workers_num'']}}'
- --dataflow_max_workers_num
- '{{$.inputs.parameters[''dataflow_max_workers_num'']}}'
- --dataflow_subnetwork
- '{{$.inputs.parameters[''dataflow_subnetwork'']}}'
- --dataflow_use_public_ips
- '{{$.inputs.parameters[''dataflow_use_public_ips'']}}'
- --kms_key_name
- '{{$.inputs.parameters[''encryption_spec_key_name'']}}'
- --output_metrics_gcs_path
- '{{$.outputs.artifacts[''evaluation_metrics''].uri}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --executor_input
- '{{$}}'
command:
- python
- /main.py
image: gcr.io/ml-pipeline/model-evaluation:v0.4
exec-model-evaluation-3:
container:
args:
- --setup_file
- /setup.py
- --json_mode
- 'true'
- --project_id
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --problem_type
- '{{$.inputs.parameters[''problem_type'']}}'
- --batch_prediction_format
- '{{$.inputs.parameters[''predictions_format'']}}'
- --batch_prediction_gcs_source
- '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}'
- --ground_truth_format
- '{{$.inputs.parameters[''ground_truth_format'']}}'
- --key_prefix_in_prediction_dataset
- instance
- --root_dir
- '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}'
- --classification_type
- multiclass
- --ground_truth_column
- instance.{{$.inputs.parameters['ground_truth_column']}}
- --prediction_score_column
- '{{$.inputs.parameters[''prediction_score_column'']}}'
- --prediction_label_column
- '{{$.inputs.parameters[''prediction_label_column'']}}'
- --prediction_id_column
- ''
- --example_weight_column
- ''
- --generate_feature_attribution
- 'false'
- --dataflow_job_prefix
- evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}
- --dataflow_service_account
- '{{$.inputs.parameters[''dataflow_service_account'']}}'
- --dataflow_disk_size
- '{{$.inputs.parameters[''dataflow_disk_size'']}}'
- --dataflow_machine_type
- '{{$.inputs.parameters[''dataflow_machine_type'']}}'
- --dataflow_workers_num
- '{{$.inputs.parameters[''dataflow_workers_num'']}}'
- --dataflow_max_workers_num
- '{{$.inputs.parameters[''dataflow_max_workers_num'']}}'
- --dataflow_subnetwork
- '{{$.inputs.parameters[''dataflow_subnetwork'']}}'
- --dataflow_use_public_ips
- '{{$.inputs.parameters[''dataflow_use_public_ips'']}}'
- --kms_key_name
- '{{$.inputs.parameters[''encryption_spec_key_name'']}}'
- --output_metrics_gcs_path
- '{{$.outputs.artifacts[''evaluation_metrics''].uri}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --executor_input
- '{{$}}'
command:
- python
- /main.py
image: gcr.io/ml-pipeline/model-evaluation:v0.4
exec-model-evaluation-import:
container:
args:
- '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}",
"--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}'
- '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}'
- '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics",
"{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}'
- '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics",
"{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}'
- '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics",
"{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}'
- '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics",
"{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}'
- '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics",
"{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}'
- '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics",
"{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}'
- '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions",
"{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}'
- '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type",
"{{$.inputs.parameters[''problem_type'']}}"]}}'
- --display_name
- '{{$.inputs.parameters[''display_name'']}}'
- --dataset_path
- '{{$.inputs.parameters[''dataset_path'']}}'
- --dataset_paths
- '{{$.inputs.parameters[''dataset_paths'']}}'
- --dataset_type
- '{{$.inputs.parameters[''dataset_type'']}}'
- --pipeline_job_id
- '{{$.pipeline_job_uuid}}'
- --pipeline_job_resource_name
- '{{$.pipeline_job_resource_name}}'
- --model_name
- '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
command:
- python3
- -u
- -m
- google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3
exec-model-evaluation-import-2:
container:
args:
- '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}",
"--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}'
- '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}'
- '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics",
"{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}'
- '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics",
"{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}'
- '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics",
"{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}'
- '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics",
"{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}'
- '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics",
"{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}'
- '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics",
"{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}'
- '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions",
"{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}'
- '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type",
"{{$.inputs.parameters[''problem_type'']}}"]}}'
- --display_name
- '{{$.inputs.parameters[''display_name'']}}'
- --dataset_path
- '{{$.inputs.parameters[''dataset_path'']}}'
- --dataset_paths
- '{{$.inputs.parameters[''dataset_paths'']}}'
- --dataset_type
- '{{$.inputs.parameters[''dataset_type'']}}'
- --pipeline_job_id
- '{{$.pipeline_job_uuid}}'
- --pipeline_job_resource_name
- '{{$.pipeline_job_resource_name}}'
- --model_name
- '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
command:
- python3
- -u
- -m
- google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3
exec-model-evaluation-import-3:
container:
args:
- '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}",
"--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}'
- '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}'
- '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics",
"{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}'
- '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics",
"{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}'
- '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics",
"{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}'
- '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics",
"{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}'
- '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics",
"{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}'
- '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics",
"{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}'
- '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions",
"{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}'
- '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type",
"{{$.inputs.parameters[''problem_type'']}}"]}}'
- --display_name
- '{{$.inputs.parameters[''display_name'']}}'
- --dataset_path
- '{{$.inputs.parameters[''dataset_path'']}}'
- --dataset_paths
- '{{$.inputs.parameters[''dataset_paths'']}}'
- --dataset_type
- '{{$.inputs.parameters[''dataset_type'']}}'
- --pipeline_job_id
- '{{$.pipeline_job_uuid}}'
- --pipeline_job_resource_name
- '{{$.pipeline_job_resource_name}}'
- --model_name
- '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
command:
- python3
- -u
- -m
- google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3
exec-model-upload:
container:
args:
- --type
- UploadModel
- --payload
- '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}",
"\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}",
"\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}",
", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}",
"}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}",
"\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
"\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}'
- --project
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --executor_input
- '{{$}}'
- '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name",
"{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}'
command:
- python3
- -u
- -m
- launcher
image: gcr.io/ml-pipeline/automl-tables-private:1.0.15
exec-model-upload-2:
container:
args:
- --type
- UploadModel
- --payload
- '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}",
"\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}",
"\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}",
", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}",
"}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}",
"\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
"\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}'
- --project
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --executor_input
- '{{$}}'
- '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name",
"{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}'
command:
- python3
- -u
- -m
- launcher
image: gcr.io/ml-pipeline/automl-tables-private:1.0.15
exec-model-upload-3:
container:
args:
- --type
- UploadModel
- --payload
- '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}",
"\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}",
"\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}",
", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}",
"}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}",
"\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
"\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}'
- --project
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --executor_input
- '{{$}}'
- '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name",
"{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}'
command:
- python3
- -u
- -m
- launcher
image: gcr.io/ml-pipeline/automl-tables-private:1.0.15
exec-read-input-uri:
container:
args:
- --executor_input
- '{{$}}'
- --function_to_execute
- _read_input_uri
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\
\ && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef _read_input_uri(\n split_uri: dsl.InputPath('Dataset'),\n\
) -> list: # Required by KFP; pylint:disable=g-bare-generic\n \"\"\"Construct\
\ Dataset based on the batch prediction job.\n\n Args:\n split_uri:\
\ Tbe path to the file that contains Dataset data.\n\n Returns:\n The\
\ list of string that represents the batch prediction input files.\n \"\
\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\
\ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\
\ with open(split_uri, 'r') as f:\n data_source = json.loads(f.read())\n\
\ return data_source['tf_record_data_source']['file_patterns']\n\n"
image: python:3.7
exec-read-input-uri-2:
container:
args:
- --executor_input
- '{{$}}'
- --function_to_execute
- _read_input_uri
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\
\ && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef _read_input_uri(\n split_uri: dsl.InputPath('Dataset'),\n\
) -> list: # Required by KFP; pylint:disable=g-bare-generic\n \"\"\"Construct\
\ Dataset based on the batch prediction job.\n\n Args:\n split_uri:\
\ Tbe path to the file that contains Dataset data.\n\n Returns:\n The\
\ list of string that represents the batch prediction input files.\n \"\
\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\
\ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\
\ with open(split_uri, 'r') as f:\n data_source = json.loads(f.read())\n\
\ return data_source['tf_record_data_source']['file_patterns']\n\n"
image: python:3.7
exec-set-optional-inputs:
container:
args:
- --executor_input
- '{{$}}'
- --function_to_execute
- _set_optional_inputs
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\
\ && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\
\ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\
\ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\
\ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\
\ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\
\ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\
\ The GCP project that runs the pipeline components.\n location: The\
\ GCP region that runs the pipeline components.\n data_source_csv_filenames:\
\ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\
\ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\
\ dataset when data source is Vertex dataset.\n model_display_name: The\
\ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\
\ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\
\ import collections\n from google.cloud import aiplatform\n from google.cloud\
\ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\
\n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\
\n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\
\ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\
\ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\
\ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\
\ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\
\ if 'gcsSource' in input_config:\n data_source_csv_filenames =\
\ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\
\ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\
\ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\
\ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\
\ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\
\ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\
\ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\
\ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\
\ data_source_bigquery_table_path,\n model_display_name,\n )\n\
\n"
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125
exec-string-not-empty:
container:
args:
- --executor_input
- '{{$}}'
- --function_to_execute
- _string_not_empty
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\
\ && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\
\ string value is not empty.\n\n Args:\n value: String value to be checked.\n\
\n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\
\ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\
\ \"\"\"\n return 'true' if value else 'false'\n\n"
image: python:3.7
exec-tabular-stats-and-example-gen:
container:
args:
- --type
- CustomJob
- --project
- '{{$.inputs.parameters[''project'']}}'
- --location
- '{{$.inputs.parameters[''location'']}}'
- --gcp_resources
- '{{$.outputs.parameters[''gcp_resources''].output_file}}'
- --payload
- '{"Concat": ["{\"display_name\": \"tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\",
\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
"\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\":
{\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"",
"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\",
\"args\": [\"stats_generator\",", "\"--train_spec={\\\"prediction_type\\\":
\\\"", "{{$.inputs.parameters[''prediction_type'']}}", "\\\", \\\"target_column\\\":
\\\"", "{{$.inputs.parameters[''target_column_name'']}}", "\\\", \\\"optimization_objective\\\":
\\\"", "{{$.inputs.parameters[''optimization_objective'']}}", "\\\", \\\"weight_column_name\\\":
\\\"", "{{$.inputs.parameters[''weight_column_name'']}}", "\\\", \\\"transformations\\\":
", "{{$.inputs.parameters[''transformations'']}}", ", \\\"quantiles\\\":
", "{{$.inputs.parameters[''quantiles'']}}", ", \\\"enable_probabilistic_inference\\\":
", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}", "}\",
\"--transformations_override_path=", "{{$.inputs.parameters[''transformations_path'']}}",
"\", \"--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}",
"\", \"--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}",
"\", \"--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}",
"\", \"--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}",
"\", \"--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}",
"\", \"--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}",
"\", \"--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}",
"\", \"--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}",
"\", \"--target_column=", "{{$.inputs.parameters[''target_column_name'']}}",
"\", \"--request_type=", "{{$.inputs.parameters[''request_type'']}}", "\",
\"--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}",
"\", \"--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}",
"\", \"--example_gen_gcs_output_prefix=", "{{$.inputs.parameters[''root_dir'']}}",
"/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/example_gen_output\",
\"--dataset_stats_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/stats/\",
\"--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}",
"\", \"--dataset_schema_path=", "{{$.outputs.artifacts[''dataset_schema''].uri}}",
"\", \"--job_name=tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}",
"\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\",
\"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\",
\"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\",
\"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\",
\"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}",
"\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125",
"\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}",
"\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}",
"\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}",
"\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}",
"\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}",
"\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}",
"\", \"--is_distill=", "{{$.inputs.parameters[''run_distillation'']}}",
"\", \"--additional_experiments=", "{{$.inputs.parameters[''additional_experiments'']}}",
"\", \"--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}", "\",
\"--train_split=", "{{$.outputs.artifacts[''train_split''].uri}}", "\",
\"--eval_split=", "{{$.outputs.artifacts[''eval_split''].uri}}", "\", \"--test_split=",
"{{$.outputs.artifacts[''test_split''].uri}}", "\", \"--test_split_for_batch_prediction_component=",
"{{$.outputs.parameters[''test_split_json''].output_file}}", "\", \"--downsampled_test_split_for_batch_prediction_component=",
"{{$.outputs.parameters[''downsampled_test_split_json''].output_file}}",
"\", \"--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}",
"\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\",
\"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}",
"\", \"--parse_json=true\", \"--generate_additional_downsample_test_split=true\",
\"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}'
command:
- python3
- -u
- -m
- google_cloud_pipeline_components.container.v1.custom_job.launcher
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44
exec-write-bp-result-path:
container:
args:
- --executor_input
- '{{$}}'
- --function_to_execute
- _write_bp_result_path
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\
\ && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef _write_bp_result_path(\n bp_job: dsl.Input[dsl.Artifact],\n\
\ result: dsl.OutputPath('Dataset'),\n):\n \"\"\"Construct Dataset based\
\ on the batch prediction job.\n\n Args:\n bp_job: The batch prediction\
\ job artifact.\n result: Tbe path to the file that contains Dataset\
\ data.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\
\ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\
\ directory = bp_job.metadata['gcsOutputDirectory']\n data_source = {\n\
\ 'tf_record_data_source': {\n 'file_patterns': [\n \
\ f'{directory}/prediction.results-*',\n ],\n 'coder':\
\ 'PROTO_VALUE',\n },\n }\n with open(result, 'w') as f:\n f.write(json.dumps(data_source))\n\
\n"
image: python:3.7
exec-write-bp-result-path-2:
container:
args:
- --executor_input
- '{{$}}'
- --function_to_execute
- _write_bp_result_path
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\
\ && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef _write_bp_result_path(\n bp_job: dsl.Input[dsl.Artifact],\n\
\ result: dsl.OutputPath('Dataset'),\n):\n \"\"\"Construct Dataset based\
\ on the batch prediction job.\n\n Args:\n bp_job: The batch prediction\
\ job artifact.\n result: Tbe path to the file that contains Dataset\
\ data.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\
\ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\
\ directory = bp_job.metadata['gcsOutputDirectory']\n data_source = {\n\
\ 'tf_record_data_source': {\n 'file_patterns': [\n \
\ f'{directory}/prediction.results-*',\n ],\n 'coder':\
\ 'PROTO_VALUE',\n },\n }\n with open(result, 'w') as f:\n f.write(json.dumps(data_source))\n\
\n"
image: python:3.7
pipelineInfo:
description: The AutoML Tabular pipeline v1.
name: automl-tabular
root:
dag:
outputs:
artifacts:
feature-attribution-2-feature_attributions:
artifactSelectors:
- outputArtifactKey: feature-attribution-2-feature_attributions
producerSubtask: exit-handler-1
feature-attribution-3-feature_attributions:
artifactSelectors:
- outputArtifactKey: feature-attribution-3-feature_attributions
producerSubtask: exit-handler-1
feature-attribution-feature_attributions:
artifactSelectors:
- outputArtifactKey: feature-attribution-feature_attributions
producerSubtask: exit-handler-1
model-evaluation-2-evaluation_metrics:
artifactSelectors:
- outputArtifactKey: model-evaluation-2-evaluation_metrics
producerSubtask: exit-handler-1
model-evaluation-3-evaluation_metrics:
artifactSelectors:
- outputArtifactKey: model-evaluation-3-evaluation_metrics
producerSubtask: exit-handler-1
model-evaluation-evaluation_metrics:
artifactSelectors:
- outputArtifactKey: model-evaluation-evaluation_metrics
producerSubtask: exit-handler-1
tasks:
automl-tabular-finalizer:
cachingOptions:
enableCache: true
componentRef:
name: comp-automl-tabular-finalizer
dependentTasks:
- exit-handler-1
inputs:
parameters:
location:
componentInputParameter: location
project:
componentInputParameter: project
root_dir:
componentInputParameter: root_dir
taskInfo:
name: automl-tabular-finalizer
triggerPolicy:
strategy: ALL_UPSTREAM_TASKS_COMPLETED
exit-handler-1:
componentRef:
name: comp-exit-handler-1
dependentTasks:
- set-optional-inputs
inputs:
artifacts:
pipelinechannel--parent_model:
componentInputArtifact: parent_model
parameters:
pipelinechannel--additional_experiments:
componentInputParameter: additional_experiments
pipelinechannel--cv_trainer_worker_pool_specs_override:
componentInputParameter: cv_trainer_worker_pool_specs_override
pipelinechannel--dataflow_service_account:
componentInputParameter: dataflow_service_account
pipelinechannel--dataflow_subnetwork:
componentInputParameter: dataflow_subnetwork
pipelinechannel--dataflow_use_public_ips:
componentInputParameter: dataflow_use_public_ips
pipelinechannel--disable_early_stopping:
componentInputParameter: disable_early_stopping
pipelinechannel--distill_batch_predict_machine_type:
componentInputParameter: distill_batch_predict_machine_type
pipelinechannel--distill_batch_predict_max_replica_count:
componentInputParameter: distill_batch_predict_max_replica_count
pipelinechannel--distill_batch_predict_starting_replica_count:
componentInputParameter: distill_batch_predict_starting_replica_count
pipelinechannel--enable_probabilistic_inference:
componentInputParameter: enable_probabilistic_inference
pipelinechannel--encryption_spec_key_name:
componentInputParameter: encryption_spec_key_name
pipelinechannel--evaluation_batch_explain_machine_type:
componentInputParameter: evaluation_batch_explain_machine_type
pipelinechannel--evaluation_batch_explain_max_replica_count:
componentInputParameter: evaluation_batch_explain_max_replica_count
pipelinechannel--evaluation_batch_explain_starting_replica_count:
componentInputParameter: evaluation_batch_explain_starting_replica_count
pipelinechannel--evaluation_batch_predict_machine_type:
componentInputParameter: evaluation_batch_predict_machine_type
pipelinechannel--evaluation_batch_predict_max_replica_count:
componentInputParameter: evaluation_batch_predict_max_replica_count
pipelinechannel--evaluation_batch_predict_starting_replica_count:
componentInputParameter: evaluation_batch_predict_starting_replica_count
pipelinechannel--evaluation_dataflow_disk_size_gb:
componentInputParameter: evaluation_dataflow_disk_size_gb
pipelinechannel--evaluation_dataflow_machine_type:
componentInputParameter: evaluation_dataflow_machine_type
pipelinechannel--evaluation_dataflow_max_num_workers:
componentInputParameter: evaluation_dataflow_max_num_workers
pipelinechannel--evaluation_dataflow_starting_num_workers:
componentInputParameter: evaluation_dataflow_starting_num_workers
pipelinechannel--export_additional_model_without_custom_ops:
componentInputParameter: export_additional_model_without_custom_ops
pipelinechannel--fast_testing:
componentInputParameter: fast_testing
pipelinechannel--location:
componentInputParameter: location
pipelinechannel--model_description:
componentInputParameter: model_description
pipelinechannel--optimization_objective:
componentInputParameter: optimization_objective
pipelinechannel--optimization_objective_precision_value:
componentInputParameter: optimization_objective_precision_value
pipelinechannel--optimization_objective_recall_value:
componentInputParameter: optimization_objective_recall_value
pipelinechannel--predefined_split_key:
componentInputParameter: predefined_split_key
pipelinechannel--prediction_type:
componentInputParameter: prediction_type
pipelinechannel--project:
componentInputParameter: project
pipelinechannel--quantiles:
componentInputParameter: quantiles
pipelinechannel--root_dir:
componentInputParameter: root_dir
pipelinechannel--run_distillation:
componentInputParameter: run_distillation
pipelinechannel--run_evaluation:
componentInputParameter: run_evaluation
pipelinechannel--set-optional-inputs-data_source_bigquery_table_path:
taskOutputParameter:
outputParameterKey: data_source_bigquery_table_path
producerTask: set-optional-inputs
pipelinechannel--set-optional-inputs-data_source_csv_filenames:
taskOutputParameter:
outputParameterKey: data_source_csv_filenames
producerTask: set-optional-inputs
pipelinechannel--set-optional-inputs-model_display_name:
taskOutputParameter:
outputParameterKey: model_display_name
producerTask: set-optional-inputs
pipelinechannel--stage_1_num_parallel_trials:
componentInputParameter: stage_1_num_parallel_trials
pipelinechannel--stage_1_tuner_worker_pool_specs_override:
componentInputParameter: stage_1_tuner_worker_pool_specs_override
pipelinechannel--stage_1_tuning_result_artifact_uri:
componentInputParameter: stage_1_tuning_result_artifact_uri
pipelinechannel--stage_2_num_parallel_trials:
componentInputParameter: stage_2_num_parallel_trials
pipelinechannel--stage_2_num_selected_trials:
componentInputParameter: stage_2_num_selected_trials
pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb:
componentInputParameter: stats_and_example_gen_dataflow_disk_size_gb
pipelinechannel--stats_and_example_gen_dataflow_machine_type:
componentInputParameter: stats_and_example_gen_dataflow_machine_type
pipelinechannel--stats_and_example_gen_dataflow_max_num_workers:
componentInputParameter: stats_and_example_gen_dataflow_max_num_workers
pipelinechannel--stratified_split_key:
componentInputParameter: stratified_split_key
pipelinechannel--study_spec_parameters_override:
componentInputParameter: study_spec_parameters_override
pipelinechannel--target_column:
componentInputParameter: target_column
pipelinechannel--test_fraction:
componentInputParameter: test_fraction
pipelinechannel--timestamp_split_key:
componentInputParameter: timestamp_split_key
pipelinechannel--train_budget_milli_node_hours:
componentInputParameter: train_budget_milli_node_hours
pipelinechannel--training_fraction:
componentInputParameter: training_fraction
pipelinechannel--transform_dataflow_disk_size_gb:
componentInputParameter: transform_dataflow_disk_size_gb
pipelinechannel--transform_dataflow_machine_type:
componentInputParameter: transform_dataflow_machine_type
pipelinechannel--transform_dataflow_max_num_workers:
componentInputParameter: transform_dataflow_max_num_workers
pipelinechannel--transformations:
componentInputParameter: transformations
pipelinechannel--validation_fraction:
componentInputParameter: validation_fraction
pipelinechannel--weight_column:
componentInputParameter: weight_column
taskInfo:
name: exit-handler-1
set-optional-inputs:
cachingOptions:
enableCache: true
componentRef:
name: comp-set-optional-inputs
inputs:
artifacts:
vertex_dataset:
componentInputArtifact: vertex_dataset
parameters:
data_source_bigquery_table_path:
componentInputParameter: data_source_bigquery_table_path
data_source_csv_filenames:
componentInputParameter: data_source_csv_filenames
location:
componentInputParameter: location
model_display_name:
componentInputParameter: model_display_name
project:
componentInputParameter: project
taskInfo:
name: set-optional-inputs
inputDefinitions:
artifacts:
parent_model:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: Vertex Model to upload this model as a version of.
isOptional: true
vertex_dataset:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
description: The Vertex dataset artifact.
parameters:
additional_experiments:
description: Use this field to config private preview features.
isOptional: true
parameterType: STRUCT
cv_trainer_worker_pool_specs_override:
description: 'The dictionary for overriding stage
cv trainer worker pool spec.'
isOptional: true
parameterType: LIST
data_source_bigquery_table_path:
defaultValue: ''
description: 'The BigQuery table path of format
bq://bq_project.bq_dataset.bq_table'
isOptional: true
parameterType: STRING
data_source_csv_filenames:
defaultValue: ''
description: 'A string that represents a list of comma
separated CSV filenames.'
isOptional: true
parameterType: STRING
dataflow_service_account:
defaultValue: ''
description: Custom service account to run dataflow jobs.
isOptional: true
parameterType: STRING
dataflow_subnetwork:
defaultValue: ''
description: 'Dataflow''s fully qualified subnetwork name, when empty
the default subnetwork will be used. Example:
https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications'
isOptional: true
parameterType: STRING
dataflow_use_public_ips:
defaultValue: true
description: 'Specifies whether Dataflow workers use public IP
addresses.'
isOptional: true
parameterType: BOOLEAN
disable_early_stopping:
defaultValue: false
description: If disable easly stopping.
isOptional: true
parameterType: BOOLEAN
distill_batch_predict_machine_type:
defaultValue: n1-standard-16
description: 'The prediction server machine type for
batch predict component in the model distillation.'
isOptional: true
parameterType: STRING
distill_batch_predict_max_replica_count:
defaultValue: 25.0
description: 'The max number of prediction server
for batch predict component in the model distillation.'
isOptional: true
parameterType: NUMBER_INTEGER
distill_batch_predict_starting_replica_count:
defaultValue: 25.0
description: 'The initial number of
prediction server for batch predict component in the model distillation.'
isOptional: true
parameterType: NUMBER_INTEGER
enable_probabilistic_inference:
defaultValue: false
description: 'If probabilistic inference is enabled, the
model will fit a distribution that captures the uncertainty of a
prediction. At inference time, the predictive distribution is used to make
a point prediction that minimizes the optimization objective. For example,
the mean of a predictive distribution is the point prediction that
minimizes RMSE loss. If quantiles are specified, then the quantiles of the
distribution are also returned.'
isOptional: true
parameterType: BOOLEAN
encryption_spec_key_name:
defaultValue: ''
description: The KMS key name.
isOptional: true
parameterType: STRING
evaluation_batch_explain_machine_type:
defaultValue: n1-highmem-8
description: 'The prediction server machine type
for batch explain components during evaluation.'
isOptional: true
parameterType: STRING
evaluation_batch_explain_max_replica_count:
defaultValue: 10.0
description: 'The max number of prediction
server for batch explain components during evaluation.'
isOptional: true
parameterType: NUMBER_INTEGER
evaluation_batch_explain_starting_replica_count:
defaultValue: 10.0
description: 'The initial number of
prediction server for batch explain components during evaluation.'
isOptional: true
parameterType: NUMBER_INTEGER
evaluation_batch_predict_machine_type:
defaultValue: n1-highmem-8
description: 'The prediction server machine type
for batch predict components during evaluation.'
isOptional: true
parameterType: STRING
evaluation_batch_predict_max_replica_count:
defaultValue: 20.0
description: 'The max number of prediction
server for batch predict components during evaluation.'
isOptional: true
parameterType: NUMBER_INTEGER
evaluation_batch_predict_starting_replica_count:
defaultValue: 20.0
description: 'The initial number of
prediction server for batch predict components during evaluation.'
isOptional: true
parameterType: NUMBER_INTEGER
evaluation_dataflow_disk_size_gb:
defaultValue: 50.0
description: 'Dataflow worker''s disk size in GB for
evaluation components.'
isOptional: true
parameterType: NUMBER_INTEGER
evaluation_dataflow_machine_type:
defaultValue: n1-standard-4
description: 'The dataflow machine type for evaluation
components.'
isOptional: true
parameterType: STRING
evaluation_dataflow_max_num_workers:
defaultValue: 100.0
description: 'The max number of Dataflow workers for
evaluation components.'
isOptional: true
parameterType: NUMBER_INTEGER
evaluation_dataflow_starting_num_workers:
defaultValue: 10.0
description: 'The initial number of Dataflow
workers for evaluation components.'
isOptional: true
parameterType: NUMBER_INTEGER
export_additional_model_without_custom_ops:
defaultValue: false
description: 'Whether to export additional
model without custom TensorFlow operators.'
isOptional: true
parameterType: BOOLEAN
fast_testing:
defaultValue: false
description: Internal flag used for presubmit tests.
isOptional: true
parameterType: BOOLEAN
location:
description: The GCP region that runs the pipeline components.
parameterType: STRING
model_description:
defaultValue: ''
description: The description name of the uploaded Vertex model,
isOptional: true
parameterType: STRING
model_display_name:
defaultValue: ''
description: The display name of the uploaded Vertex model,
isOptional: true
parameterType: STRING
optimization_objective:
description: 'For binary classification, "maximize-au-roc",
"minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall",
or
"maximize-recall-at-precision". For multi class classification,
"minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or
"minimize-rmsle".'
parameterType: STRING
optimization_objective_precision_value:
defaultValue: -1.0
description: 'Required when optimization_objective
is ''maximize-recall-at-precision''. Must be between 0 and 1, inclusive.'
isOptional: true
parameterType: NUMBER_DOUBLE
optimization_objective_recall_value:
defaultValue: -1.0
description: 'Required when optimization_objective is
''maximize-precision-at-recall''. Must be between 0 and 1, inclusive.'
isOptional: true
parameterType: NUMBER_DOUBLE
predefined_split_key:
defaultValue: ''
description: The predefined_split column name.
isOptional: true
parameterType: STRING
prediction_type:
description: 'The type of prediction the model is to produce.
"classification" or "regression".'
parameterType: STRING
project:
description: The GCP project that runs the pipeline components.
parameterType: STRING
quantiles:
description: 'Quantiles to use for probabilistic inference. Up to 5 quantiles
are allowed of values between 0 and 1, exclusive. Represents the quantiles
to use for that objective. Quantiles must be unique.'
isOptional: true
parameterType: LIST
root_dir:
description: The root GCS directory for the pipeline components.
parameterType: STRING
run_distillation:
defaultValue: false
description: 'Whether the distillation should be applied to the
training.'
isOptional: true
parameterType: BOOLEAN
run_evaluation:
defaultValue: false
description: Whether to run evaluation steps during training.
isOptional: true
parameterType: BOOLEAN
stage_1_num_parallel_trials:
defaultValue: 35.0
description: Number of parallel trails for stage 1.
isOptional: true
parameterType: NUMBER_INTEGER
stage_1_tuner_worker_pool_specs_override:
description: 'The dictionary for overriding
stage 1 tuner worker pool spec.'
isOptional: true
parameterType: LIST
stage_1_tuning_result_artifact_uri:
defaultValue: ''
description: 'The stage 1 tuning result artifact GCS
URI.'
isOptional: true
parameterType: STRING
stage_2_num_parallel_trials:
defaultValue: 35.0
description: Number of parallel trails for stage 2.
isOptional: true
parameterType: NUMBER_INTEGER
stage_2_num_selected_trials:
defaultValue: 5.0
description: Number of selected trails for stage 2.
isOptional: true
parameterType: NUMBER_INTEGER
stats_and_example_gen_dataflow_disk_size_gb:
defaultValue: 40.0
description: 'Dataflow worker''s disk size in
GB for stats_and_example_gen component.'
isOptional: true
parameterType: NUMBER_INTEGER
stats_and_example_gen_dataflow_machine_type:
defaultValue: n1-standard-16
description: 'The dataflow machine type for
stats_and_example_gen component.'
isOptional: true
parameterType: STRING
stats_and_example_gen_dataflow_max_num_workers:
defaultValue: 25.0
description: 'The max number of Dataflow
workers for stats_and_example_gen component.'
isOptional: true
parameterType: NUMBER_INTEGER
stratified_split_key:
defaultValue: ''
description: The stratified_split column name.
isOptional: true
parameterType: STRING
study_spec_parameters_override:
description: The list for overriding study spec.
isOptional: true
parameterType: LIST
target_column:
description: The target column name.
parameterType: STRING
test_fraction:
defaultValue: -1.0
description: float = The test fraction.
isOptional: true
parameterType: NUMBER_DOUBLE
timestamp_split_key:
defaultValue: ''
description: The timestamp_split column name.
isOptional: true
parameterType: STRING
train_budget_milli_node_hours:
description: 'The train budget of creating this model,
expressed in milli node hours i.e. 1,000 value in this field means 1 node
hour.'
parameterType: NUMBER_DOUBLE
training_fraction:
defaultValue: -1.0
description: The training fraction.
isOptional: true
parameterType: NUMBER_DOUBLE
transform_dataflow_disk_size_gb:
defaultValue: 40.0
description: 'Dataflow worker''s disk size in GB for
transform component.'
isOptional: true
parameterType: NUMBER_INTEGER
transform_dataflow_machine_type:
defaultValue: n1-standard-16
description: 'The dataflow machine type for transform
component.'
isOptional: true
parameterType: STRING
transform_dataflow_max_num_workers:
defaultValue: 25.0
description: 'The max number of Dataflow workers for
transform component.'
isOptional: true
parameterType: NUMBER_INTEGER
transformations:
description: 'The path to a GCS file containing the transformations to
apply.'
parameterType: STRING
validation_fraction:
defaultValue: -1.0
description: The validation fraction.
isOptional: true
parameterType: NUMBER_DOUBLE
weight_column:
defaultValue: ''
description: The weight column name.
isOptional: true
parameterType: STRING
outputDefinitions:
artifacts:
feature-attribution-2-feature_attributions:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
feature-attribution-3-feature_attributions:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
feature-attribution-feature_attributions:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
model-evaluation-2-evaluation_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
model-evaluation-3-evaluation_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
model-evaluation-evaluation_metrics:
artifactType:
schemaTitle: system.Metrics
schemaVersion: 0.0.1
schemaVersion: 2.1.0
sdkVersion: kfp-2.0.0-beta.17
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment