Taylor Sheneman Halliax

## gpu-spot-group.yaml
GPUSpotNodeGroup:
  Type: AWS::AutoScaling::AutoScalingGroup
  Properties:
    AutoScalingGroupName: !Sub "${ClusterName}-${NodeGroupName}"
    DesiredCapacity: !Ref NodeAutoScalingGroupDesiredSize # 0
    MinSize: !Ref NodeAutoScalingGroupMinSize # 0
    MaxSize: !Ref NodeAutoScalingGroupMaxSize # 10, arbitrarily
    MixedInstancesPolicy:
      InstancesDistribution:
        OnDemandBaseCapacity: !Ref OnDemandBaseCapacity # 0

## launch-template-user-data.yaml
Parameters:
  ...
  BootstrapArgumentsForSpotFleet:
    Description: Arguments to pass to the bootstrap script. See files/bootstrap.sh in https://github.com/awslabs/amazon-eks-ami
    Type: String
    Default: "--kubelet-extra-args '--node-labels=lifecycle=Ec2Spot,nvidia.com/gpu=true,k8s.amazonaws.com/accelerator=nvidia-tesla
                                    --register-with-taints=spotInstance=true:PreferNoSchedule,nvidia.com/gpu=true:NoSchedule'"
  ...
...
Resources:

## example-gpu-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: cuda-vector-add
  labels:
    app: cuda-vector-add
spec:
  replicas: 3
  selector:
    matchLabels:

## example-gpu-argo-workflow.yaml
apiVersion: argoproj.io/v1alpha1
kind: Workflow
metadata:
  generateName: cuda-vector-add-
spec:
  entrypoint: main
  templates:
  - name: main
    # requires this pod to be run on an nvidia.com/gpu labeled node
    nodeSelector:
	GPUSpotNodeGroup:
	Type: AWS::AutoScaling::AutoScalingGroup
	Properties:
	AutoScalingGroupName: !Sub "${ClusterName}-${NodeGroupName}"
	DesiredCapacity: !Ref NodeAutoScalingGroupDesiredSize # 0
	MinSize: !Ref NodeAutoScalingGroupMinSize # 0
	MaxSize: !Ref NodeAutoScalingGroupMaxSize # 10, arbitrarily
	MixedInstancesPolicy:
	InstancesDistribution:
	OnDemandBaseCapacity: !Ref OnDemandBaseCapacity # 0
	Parameters:
	...
	BootstrapArgumentsForSpotFleet:
	Description: Arguments to pass to the bootstrap script. See files/bootstrap.sh in https://github.com/awslabs/amazon-eks-ami
	Type: String
	Default: "--kubelet-extra-args '--node-labels=lifecycle=Ec2Spot,nvidia.com/gpu=true,k8s.amazonaws.com/accelerator=nvidia-tesla
	--register-with-taints=spotInstance=true:PreferNoSchedule,nvidia.com/gpu=true:NoSchedule'"
	...
	...
	Resources:
	apiVersion: apps/v1
	kind: Deployment
	metadata:
	name: cuda-vector-add
	labels:
	app: cuda-vector-add
	spec:
	replicas: 3
	selector:
	matchLabels:
	apiVersion: argoproj.io/v1alpha1
	kind: Workflow
	metadata:
	generateName: cuda-vector-add-
	spec:
	entrypoint: main
	templates:
	- name: main
	# requires this pod to be run on an nvidia.com/gpu labeled node
	nodeSelector: