Skip to content

Instantly share code, notes, and snippets.

@mike-callahan
Last active March 18, 2024 19:05
Show Gist options
  • Save mike-callahan/66365dca36b4207ca1ab2eb75941119b to your computer and use it in GitHub Desktop.
Save mike-callahan/66365dca36b4207ca1ab2eb75941119b to your computer and use it in GitHub Desktop.
Terraform example of a GCP MIG to be used with Dynamic Workload Scheduler (resize requests)
module "gpudirect" {
source = "git::https://github.com/GoogleCloudPlatform/professional-services.git//examples/gpudirect-tcpx"
project_id = "PROJECT-ID"
region = "us-central1"
}
data "google_service_account" "sa" {
account_id = "SERVICE-ACCOUNT@developer.gserviceaccount.com"
}
resource "google_compute_region_instance_template" "a3_dws" {
name = "a3-dws"
project = "PROJECT-ID"
region = "us-central1"
description = "This template is used to create a mig instance that is compatible with DWS resize requests."
instance_description = "A3 GPU"
machine_type = "a3-highgpu-8g"
can_ip_forward = false
scheduling {
automatic_restart = false
on_host_maintenance = "TERMINATE"
}
disk {
source_image = "cos-cloud/cos-105-lts"
auto_delete = true
boot = true
disk_type = "pd-ssd"
disk_size_gb = "960"
mode = "READ_WRITE"
}
guest_accelerator {
type = "nvidia-h100-80gb"
count = 8
}
reservation_affinity {
type = "NO_RESERVATION"
}
network_interface {
subnetwork = module.gpudirect.subnetworks.management.id
nic_type = "GVNIC"
stack_type = "IPV4_ONLY"
access_config {
network_tier = "PREMIUM"
}
}
network_interface {
subnetwork = module.gpudirect.subnetworks.data_plane.one.id
nic_type = "GVNIC"
stack_type = "IPV4_ONLY"
access_config {
network_tier = "PREMIUM"
}
}
network_interface {
subnetwork = module.gpudirect.subnetworks.data_plane.two.id
nic_type = "GVNIC"
stack_type = "IPV4_ONLY"
access_config {
network_tier = "PREMIUM"
}
}
network_interface {
subnetwork = module.gpudirect.subnetworks.data_plane.three.id
nic_type = "GVNIC"
stack_type = "IPV4_ONLY"
access_config {
network_tier = "PREMIUM"
}
}
network_interface {
subnetwork = module.gpudirect.subnetworks.data_plane.four.id
nic_type = "GVNIC"
stack_type = "IPV4_ONLY"
access_config {
network_tier = "PREMIUM"
}
}
shielded_instance_config {
enable_vtpm = true
enable_integrity_monitoring = true
}
service_account {
email = data.google_service_account.sa.email
scopes = ["cloud-platform"]
}
}
resource "google_compute_instance_group_manager" "a3_dws" {
name = "a3-dws"
project = "PROJECT-ID"
base_instance_name = "a3-dws"
zone = "us-central1-a"
version {
instance_template = google_compute_region_instance_template.a3_dws.self_link
}
instance_lifecycle_policy {
default_action_on_failure = "DO_NOTHING"
}
wait_for_instances = false
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment