Skip to content

Instantly share code, notes, and snippets.

@HumairAK
Created June 7, 2024 17:51
Show Gist options
  • Save HumairAK/3ccfdade7f24e8896a6629e4075d6143 to your computer and use it in GitHub Desktop.
Save HumairAK/3ccfdade7f24e8896a6629e4075d6143 to your computer and use it in GitHub Desktop.
train-no-cache.yaml
# PIPELINE DEFINITION
# Name: tutorial-data-passing-2
# Inputs:
# message: str [Default: 'message']
components:
comp-preprocess:
executorLabel: exec-preprocess
inputDefinitions:
parameters:
message:
parameterType: STRING
outputDefinitions:
artifacts:
output_dataset_one:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
output_dataset_two_path:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
parameters:
output_bool_parameter_path:
parameterType: BOOLEAN
output_dict_parameter_path:
parameterType: STRUCT
output_list_parameter_path:
parameterType: LIST
output_parameter_path:
parameterType: STRING
comp-train:
executorLabel: exec-train
inputDefinitions:
artifacts:
dataset_one_path:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
dataset_two:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
parameters:
input_bool:
parameterType: BOOLEAN
input_dict:
parameterType: STRUCT
input_list:
parameterType: LIST
message:
parameterType: STRING
num_steps:
defaultValue: 100.0
isOptional: true
parameterType: NUMBER_INTEGER
outputDefinitions:
artifacts:
model:
artifactType:
schemaTitle: system.Model
schemaVersion: 0.0.1
deploymentSpec:
executors:
exec-preprocess:
container:
args:
- --executor_input
- '{{$}}'
- --function_to_execute
- preprocess
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\
$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef preprocess(\n # An input parameter of type string.\n message:\
\ str,\n # Use Output[T] to get a metadata-rich handle to the output\
\ artifact\n # of type `Dataset`.\n output_dataset_one: Output[Dataset],\n\
\ # A locally accessible filepath for another output artifact of type\n\
\ # `Dataset`.\n output_dataset_two_path: OutputPath('Dataset'),\n\
\ # A locally accessible filepath for an output parameter of type string.\n\
\ output_parameter_path: OutputPath(str),\n # A locally accessible\
\ filepath for an output parameter of type bool.\n output_bool_parameter_path:\
\ OutputPath(bool),\n # A locally accessible filepath for an output parameter\
\ of type dict.\n output_dict_parameter_path: OutputPath(Dict[str, int]),\n\
\ # A locally accessible filepath for an output parameter of type list.\n\
\ output_list_parameter_path: OutputPath(List[str]),\n):\n \"\"\"\
Dummy preprocessing step.\"\"\"\n\n # Use Dataset.path to access a local\
\ file path for writing.\n # One can also use Dataset.uri to access the\
\ actual URI file path.\n with open(output_dataset_one.path, 'w') as\
\ f:\n f.write(message)\n\n # OutputPath is used to just pass\
\ the local file path of the output artifact\n # to the function.\n \
\ with open(output_dataset_two_path, 'w') as f:\n f.write(message)\n\
\n with open(output_parameter_path, 'w') as f:\n f.write(message)\n\
\n with open(output_bool_parameter_path, 'w') as f:\n f.write(\n\
\ str(True)) # use either `str()` or `json.dumps()` for bool\
\ values.\n\n import json\n with open(output_dict_parameter_path,\
\ 'w') as f:\n f.write(json.dumps({'A': 1, 'B': 2}))\n\n with\
\ open(output_list_parameter_path, 'w') as f:\n f.write(json.dumps(['a',\
\ 'b', 'c']))\n\n"
image: quay.io/opendatahub/ds-pipelines-sample-base:v1.0
exec-train:
container:
args:
- --executor_input
- '{{$}}'
- --function_to_execute
- train
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.3.0'\
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\
$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef train(\n # Use InputPath to get a locally accessible path\
\ for the input artifact\n # of type `Dataset`.\n dataset_one_path:\
\ InputPath('Dataset'),\n # Use Input[T] to get a metadata-rich handle\
\ to the input artifact\n # of type `Dataset`.\n dataset_two: Input[Dataset],\n\
\ # An input parameter of type string.\n message: str,\n # Use\
\ Output[T] to get a metadata-rich handle to the output artifact\n #\
\ of type `Model`.\n model: Output[Model],\n # An input parameter\
\ of type bool.\n input_bool: bool,\n # An input parameter of type\
\ dict.\n input_dict: Dict[str, int],\n # An input parameter of type\
\ List[str].\n input_list: List[str],\n # An input parameter of type\
\ int with a default value.\n num_steps: int = 100,\n):\n \"\"\"Dummy\
\ Training step.\"\"\"\n with open(dataset_one_path, 'r') as input_file:\n\
\ dataset_one_contents = input_file.read()\n\n with open(dataset_two.path,\
\ 'r') as input_file:\n dataset_two_contents = input_file.read()\n\
\ print(\"test3\")\n line = (f'dataset_one_contents: {dataset_one_contents}\
\ || '\n f'dataset_two_contents: {dataset_two_contents} || '\n\
\ f'message: {message} || '\n f'input_bool: {input_bool},\
\ type {type(input_bool)} || '\n f'input_dict: {input_dict},\
\ type {type(input_dict)} || '\n f'input_list: {input_list},\
\ type {type(input_list)} \\n')\n\n with open(model.path, 'w') as output_file:\n\
\ for i in range(num_steps):\n output_file.write('Step\
\ {}\\n{}\\n=====\\n'.format(i, line))\n\n # model is an instance of\
\ Model artifact, which has a .metadata dictionary\n # to store arbitrary\
\ metadata for the output artifact.\n model.metadata['accuracy'] = 0.9\n\
\n"
image: quay.io/opendatahub/ds-pipelines-sample-base:v1.0
pipelineInfo:
name: tutorial-data-passing-2
root:
dag:
tasks:
preprocess:
cachingOptions: {}
componentRef:
name: comp-preprocess
inputs:
parameters:
message:
componentInputParameter: message
taskInfo:
name: preprocess
train:
cachingOptions: {}
componentRef:
name: comp-train
dependentTasks:
- preprocess
inputs:
artifacts:
dataset_one_path:
taskOutputArtifact:
outputArtifactKey: output_dataset_one
producerTask: preprocess
dataset_two:
taskOutputArtifact:
outputArtifactKey: output_dataset_two_path
producerTask: preprocess
parameters:
input_bool:
taskOutputParameter:
outputParameterKey: output_bool_parameter_path
producerTask: preprocess
input_dict:
taskOutputParameter:
outputParameterKey: output_dict_parameter_path
producerTask: preprocess
input_list:
taskOutputParameter:
outputParameterKey: output_list_parameter_path
producerTask: preprocess
message:
taskOutputParameter:
outputParameterKey: output_parameter_path
producerTask: preprocess
taskInfo:
name: train
inputDefinitions:
parameters:
message:
defaultValue: message
isOptional: true
parameterType: STRING
schemaVersion: 2.1.0
sdkVersion: kfp-2.3.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment