Skip to content

Instantly share code, notes, and snippets.

@piraka9011
Created April 20, 2023 04:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save piraka9011/0ad79a2d25ab1dd51f88c244e3134ab9 to your computer and use it in GitHub Desktop.
Save piraka9011/0ad79a2d25ab1dd51f88c244e3134ab9 to your computer and use it in GitHub Desktop.
Start or create an EC2 instance (used as a celery task)
def start_or_create_ec2_instance(self, instance_id=None, region_name="us-west-2", **kwargs):
session = boto3.session.Session(region_name=region_name)
ssm = session.client("ssm")
# Default to open SSH security groups
sg_id_region_map = {
"us-west-2": "sg-xxxx",
"us-east-1": "sg-xxxx",
}
# Default to public subnets
subnet_id_region_map = {
"us-west-2": ["subnet-xxxx", "subnet-xxxx"],
"us-east-1": [
"subnet-xxxx", # us-east-1a
"subnet-xxxx", # us-east-1b
"subnet-xxxx", # us-east-1c
"subnet-xxxx", # us-east-1d
],
}
if instance_id is None or instance_id == "":
if kwargs.get("image_id"):
image_id = kwargs.get("image_id")
else:
# Use the AL2 GPU optimized image for ECS by default
ecs_gpu_param = ssm.get_parameter(
Name="/aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended"
)
image_id = json.loads(ecs_gpu_param["Parameter"]["Value"])["image_id"]
# We usually want the largest instance when running this task since they're usually out of capacity
instance_type = kwargs.get("instance_type", "p4d.24xlarge")
instance_count = kwargs.get("instance_count", 1)
security_group_id = kwargs.get("security_group_id", sg_id_region_map[region_name])
subnet_id = kwargs.get("subnet_id", random.choice(subnet_id_region_map[region_name]))
logger.info(
f"Attempting to create an instance with the following properties:\n"
f"Region: {region_name}\nImageId: {image_id}\nInstanceType: {instance_type}\n"
f"Count: {instance_count}\nSecurityGroupId: {security_group_id}\nSubnetId: {subnet_id}"
)
ec2 = session.resource("ec2")
try:
instances = ec2.create_instances(
ImageId=image_id,
InstanceType=instance_type,
KeyName=kwargs.get("key_name", "tarteel-bastion"),
MinCount=instance_count,
MaxCount=instance_count,
SecurityGroupIds=[security_group_id],
SubnetId=subnet_id,
BlockDeviceMappings=[
{
"DeviceName": kwargs.get("DeviceName", "/dev/xvda"),
"Ebs": {"VolumeSize": kwargs.get("VolumeSize", 64)},
}
],
)
logger.info(f"Started EC2 instance: {instances}")
except botocore.exceptions.ClientError as error:
if error.response["Error"]["Code"] == "InsufficientInstanceCapacity":
self.retry(exc=error)
else:
raise error
else:
logger.info(f"Attempting to start instance with id: {instance_id}")
ec2 = session.client("ec2")
try:
instances = ec2.start_instances(InstanceIds=[instance_id])
logger.info(f"Started instances: {instances}")
except botocore.exceptions.ClientError as error:
if error.response["Error"]["Code"] == "InsufficientInstanceCapacity":
self.retry(exc=error)
else:
raise error
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment