Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nivleshc/a4a99a5c2bca1747b6da0d7da0e388c1 to your computer and use it in GitHub Desktop.
Save nivleshc/a4a99a5c2bca1747b6da0d7da0e388c1 to your computer and use it in GitHub Desktop.
This is the AWS Step Function to automate training, build and deployment of an Amazon SageMaker model
{
"Comment": "An AWS Step Function State Machine to train, build and deploy an Amazon SageMaker model endpoint",
"StartAt": "Create Training Job",
"States": {
"Create Training Job": {
"Type": "Task",
"Resource": "arn:aws:states:::sagemaker:createTrainingJob.sync",
"Parameters": {
"TrainingJobName.$": "$$.Execution.Name",
"ResourceConfig": {
"InstanceCount": 1,
"InstanceType": "ml.m4.xlarge",
"VolumeSizeInGB": 5
},
"HyperParameters": {
"max_depth": "5",
"eta": "0.2",
"gamma": "4",
"min_child_weight": "6",
"silent": "0",
"objective": "multi:softmax",
"num_class": "10",
"num_round": "10"
},
"AlgorithmSpecification": {
"TrainingImage": "544295431143.dkr.ecr.ap-southeast-2.amazonaws.com/xgboost:1",
"TrainingInputMode": "File"
},
"OutputDataConfig": {
"S3OutputPath": "s3://bucketName/data/modelartifacts"
},
"StoppingCondition": {
"MaxRuntimeInSeconds": 86400
},
"RoleArn": "iam-role-arn",
"InputDataConfig": [
{
"ChannelName": "train",
"ContentType": "text/csv",
"DataSource": {
"S3DataSource": {
"S3DataType": "S3Prefix",
"S3Uri": "s3://bucketName/data/train",
"S3DataDistributionType": "FullyReplicated"
}
}
},
{
"ChannelName": "validation",
"ContentType": "text/csv",
"DataSource": {
"S3DataSource": {
"S3DataType": "S3Prefix",
"S3Uri": "s3://bucketName/data/validation",
"S3DataDistributionType": "FullyReplicated"
}
}
}
]
},
"Retry": [
{
"ErrorEquals": [
"SageMaker.AmazonSageMakerException"
],
"IntervalSeconds": 1,
"MaxAttempts": 1,
"BackoffRate": 1.1
},
{
"ErrorEquals": [
"SageMaker.ResourceLimitExceededException"
],
"IntervalSeconds": 60,
"MaxAttempts": 1,
"BackoffRate": 1
},
{
"ErrorEquals": [
"States.Timeout"
],
"IntervalSeconds": 1,
"MaxAttempts": 1,
"BackoffRate": 1
}
],
"Catch": [
{
"ErrorEquals": [
"States.ALL"
],
"ResultPath": "$.cause",
"Next": "Display Error"
}
],
"Next": "Create Model"
},
"Create Model": {
"Parameters": {
"PrimaryContainer": {
"Image": "544295431143.dkr.ecr.ap-southeast-2.amazonaws.com/xgboost:1",
"Environment": {},
"ModelDataUrl.$": "$.ModelArtifacts.S3ModelArtifacts"
},
"ExecutionRoleArn": "iam-role-arn",
"ModelName.$": "$.TrainingJobName"
},
"Resource": "arn:aws:states:::sagemaker:createModel",
"Type": "Task",
"ResultPath":"$.taskresult",
"Next": "Create Endpoint Config"
},
"Create Endpoint Config": {
"Type": "Task",
"Resource": "arn:aws:states:::sagemaker:createEndpointConfig",
"Parameters":{
"EndpointConfigName.$": "$.TrainingJobName",
"ProductionVariants": [
{
"InitialInstanceCount": 1,
"InstanceType": "ml.t2.medium",
"ModelName.$": "$.TrainingJobName",
"VariantName": "AllTraffic"
}
]
},
"ResultPath":"$.taskresult",
"Next":"Create Endpoint"
},
"Create Endpoint":{
"Type":"Task",
"Resource":"arn:aws:states:::sagemaker:createEndpoint",
"Parameters":{
"EndpointConfigName.$": "$.TrainingJobName",
"EndpointName.$": "$.TrainingJobName"
},
"End": true
},
"Display Error":{
"Type": "Pass",
"Result": "Finished with errors. Please check the individual steps for more information",
"End": true
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment