Created
September 27, 2023 14:48
-
-
Save omerh/9dc5168a7575f7763326006309302e5b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# EC2 P5.48xlarge Instance based on Amazon Linux 2 PyTorch DLAMI | |
# set -x | |
# Install the dependencie | |
# sudo yum install -y jq curl | |
# Variables | |
REGION="<REGION>" | |
VPC_ID="<VPC_ID>" # You can list you VPCs aws ec2 describe-vpcs --region $REGION | jq '.[][].VpcId' | |
# PUBLIC="<true|false>" # If subnet is public, set PUBLIC=true, otherwise set PUBLIC=false, it will add a public IP to the instance interface 0 | |
PUBLIC=true | |
# Instance variable configuration | |
# Subnet ID | |
SUBNET_ID="<SUBNET_ID>" # Make sure to check pool mapping to direc the customer to choose the AZ with capacity | |
# SSH_KEY="<SSH_KEY>" | |
SSH_KEY="us-east-1-default" | |
# INSTACE_PROFILE="<INSTACE_PROFILE>" ## Optional | |
ROOT_EBS_SIZE=100 # You need at least 20 GB for loading DLC | |
ROOT_EBS_IOPS=5000 | |
ROOT_EBS_THROUGHPUT=250 | |
INSTANCE_COUNT=1 | |
# Security group | |
SG_ID=$(aws ec2 create-security-group --vpc-id $VPC_ID --region $REGION --group-name p5 --description p5 --output text --query 'GroupId') | |
if [ $? -ne 0 ]; then | |
SG_ID=$(aws ec2 describe-security-groups --region $REGION --group-names p5 --query 'SecurityGroups[*].GroupId' --output text) | |
fi | |
# Authorize all traffic inbound/outbound to self, requirement for EFA to work properly | |
aws ec2 authorize-security-group-egress --group-id $SG_ID --protocol all --source-group $SG_ID --region $REGION > /dev/null 2>&1 | |
aws ec2 authorize-security-group-ingress --group-id $SG_ID --protocol all --source-group $SG_ID --region $REGION > /dev/null 2>&1 | |
# Authorize your IP | |
MY_IP=$(curl -s ifconfig.co) | |
aws ec2 authorize-security-group-ingress --group-id $SG_ID --protocol tcp --port 22 --region $REGION --cidr $MY_IP/32 > /dev/null 2>&1 | |
# Create a cluster placement group | |
CPG_ID=$(aws ec2 create-placement-group --region ${REGION} --group-name p5 --strategy cluster --query PlacementGroup.GroupId) | |
if [ $? -ne 0 ]; then | |
CPG_ID=$(aws ec2 describe-placement-groups --region ${REGION} --group-names p5 --query 'PlacementGroups[*].GroupId' --output text) | |
fi | |
# Get the appropriate AMI | |
AMI_ID=$(aws ec2 describe-images --region ${REGION} --owners amazon --filters 'Name=name,Values=Deep Learning AMI GPU PyTorch 2.0.? (Amazon Linux 2) ????????' 'Name=state,Values=available' --query 'reverse(sort_by(Images, &CreationDate))[:1].ImageId' --output text) | |
# Optional switchs to the run instances command | |
# IAM Profile | |
# --iam-instance-profile ${INSTANCE_PROFILE} \ | |
# Monitoring | |
# Enable enhanced monitoring | |
# --monitoring Enabled=true \ | |
INSTANCE_ID=$(aws ec2 run-instances \ | |
--region ${REGION} \ | |
--count 1 \ | |
--image-id ${AMI_ID} \ | |
--instance-type p5.48xlarge \ | |
--instance-market-options "MarketType=spot" \ | |
--placement GroupId=${CPG_ID} \ | |
--key-name ${SSH_KEY} \ | |
--ebs-optimized \ | |
--block-device-mappings "DeviceName=/dev/sda1,Ebs={VolumeSize=${ROOT_EBS_SIZE},VolumeType=gp3,Iops=${ROOT_EBS_IOPS},Throughput=${ROOT_EBS_THROUGHPUT}}" \ | |
--network-interfaces \ | |
DeviceIndex=0,NetworkCardIndex=0,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=1,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=2,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=3,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=4,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=5,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=6,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=7,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=8,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=9,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=10,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=11,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=12,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=13,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=14,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=15,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=16,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=17,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=18,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=19,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=20,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=21,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=22,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=23,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=24,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=25,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=26,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=27,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=28,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=29,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=30,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
DeviceIndex=1,NetworkCardIndex=31,SubnetId=${SUBNET_ID},Groups=${SG_ID},AssociatePublicIpAddress=false,InterfaceType=efa \ | |
--query 'Instances[0].InstanceId' --output text) | |
if [ $? -ne 0 ]; then | |
echo "Failed to launch instance, check limits or response error message" | |
exit 1 | |
fi | |
echo "Waiting for instance $INSTANCE_ID to start" | |
aws ec2 wait instance-status-ok --region $REGION --instance-ids $INSTANCE_ID | |
if $PUBLIC; then | |
ENI_ID=$(aws ec2 describe-instances --region $REGION --instance-ids $INSTANCE_ID --query 'Reservations[*].Instances[*].NetworkInterfaces[0].NetworkInterfaceId' --output text) | |
EIP_ALLOCATION_ID=$(aws ec2 allocate-address --region $REGION --query 'AllocationId' --output text) | |
aws ec2 associate-address --region $REGION --allocation-id $EIP_ALLOCATION_ID --network-interface-id $ENI_ID > /dev/null 2>&1 | |
TARGET_IP=$(aws ec2 describe-addresses --filters "Name=allocation-id,Values=$EIP_ALLOCATION_ID" --region $REGION --query 'Addresses[*].PublicIp' --output text) | |
else | |
TARGET_IP=$(aws ec2 describe-instances --region $REGION --instance-ids $INSTANCE_ID --query 'Reservations[*].Instances[*].NetworkInterfaces[0].PrivateIpAddress' --output text) | |
fi | |
echo "Instance $INSTANCE_ID is ready, To connect ec2-user@$TARGET_IP -i $SSH_KEY.pem" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment