-
-
Save Michaelvll/1bfeeb379c355f7fde3baeab1a04f90d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
+ sky launch -y -d -c test-autostop-3446f302-cb --num-nodes 2 examples/minimal.yaml | |
Task from YAML spec: examples/minimal.yaml | |
D 11-22 19:57:00 optimizer.py:232] #### minimal #### | |
D 11-22 19:57:01 optimizer.py:261] Defaulting the task's estimated time to 1 hour. | |
D 11-22 19:57:01 optimizer.py:278] resources: AWS(m6i.2xlarge) | |
D 11-22 19:57:01 optimizer.py:287] estimated_runtime: 3600 s (1.0 hr) | |
D 11-22 19:57:01 optimizer.py:291] estimated_cost (not incl. egress): $0.8 | |
I 11-22 19:57:01 optimizer.py:606] == Optimizer == | |
I 11-22 19:57:01 optimizer.py:629] [1mEstimated cost: [0m$0.8 / hour | |
I 11-22 19:57:01 optimizer.py:629] | |
I 11-22 19:57:01 optimizer.py:685] [1mConsidered resources (2 nodes):[0m | |
I 11-22 19:57:01 optimizer.py:714] ------------------------------------------------------------------ | |
I 11-22 19:57:01 optimizer.py:714] CLOUD INSTANCE vCPUs ACCELERATORS COST ($) CHOSEN | |
I 11-22 19:57:01 optimizer.py:714] ------------------------------------------------------------------ | |
I 11-22 19:57:01 optimizer.py:714] [1mAWS[0m [1mm6i.2xlarge[0m [1m8[0m [1m-[0m [1m0.77[0m [1m[32m ✔[0m[0m | |
I 11-22 19:57:01 optimizer.py:714] ------------------------------------------------------------------ | |
I 11-22 19:57:01 optimizer.py:714] | |
Running task on cluster test-autostop-3446f302-cb... | |
I 11-22 19:57:01 cloud_vm_ray_backend.py:2889] [36mCreating a new cluster: "test-autostop-3446f302-cb" [2x AWS(m6i.2xlarge)].[0m | |
I 11-22 19:57:01 cloud_vm_ray_backend.py:2889] Tip: to reuse an existing cluster, specify --cluster (-c). Run `sky status` to see existing clusters. | |
I 11-22 19:57:01 cloud_vm_ray_backend.py:990] To view detailed progress: [1mtail -n100 -f /home/ubuntu/sky_logs/sky-2022-11-22-19-57-00-496053/provision.log[0m | |
I 11-22 19:57:02 cloud_vm_ray_backend.py:1246] [1mLaunching on AWS us-east-1[0m (us-east-1a,us-east-1b,us-east-1c,us-east-1d,us-east-1e,us-east-1f) | |
I 11-22 19:58:17 log_utils.py:45] [32mHead node is up.[0m | |
D 11-22 19:59:12 cloud_vm_ray_backend.py:1325] Ray up takes 130.46698546409607 seconds with 1 retries. | |
I 11-22 19:59:12 cloud_vm_ray_backend.py:1357] [1mSuccessfully provisioned or found existing head VM. Waiting for workers.[0m | |
D 11-22 19:59:15 backend_utils.py:929] No cluster status. | |
D 11-22 19:59:15 backend_utils.py:929] | |
D 11-22 19:59:26 backend_utils.py:929] ======== Autoscaler status: 2022-11-22 19:59:25.883512 ======== | |
D 11-22 19:59:26 backend_utils.py:929] Node status | |
D 11-22 19:59:26 backend_utils.py:929] --------------------------------------------------------------- | |
D 11-22 19:59:26 backend_utils.py:929] Healthy: | |
D 11-22 19:59:26 backend_utils.py:929] 1 ray.head.default | |
D 11-22 19:59:26 backend_utils.py:929] Pending: | |
D 11-22 19:59:26 backend_utils.py:929] 172.31.85.253: ray.worker.default, waiting-for-ssh | |
D 11-22 19:59:26 backend_utils.py:929] Recent failures: | |
D 11-22 19:59:26 backend_utils.py:929] (no failures) | |
D 11-22 19:59:26 backend_utils.py:929] | |
D 11-22 19:59:26 backend_utils.py:929] Resources | |
D 11-22 19:59:26 backend_utils.py:929] --------------------------------------------------------------- | |
D 11-22 19:59:26 backend_utils.py:929] Usage: | |
D 11-22 19:59:26 backend_utils.py:929] 0.0/8.0 CPU | |
D 11-22 19:59:26 backend_utils.py:929] 0.00/18.153 GiB memory | |
D 11-22 19:59:26 backend_utils.py:929] 0.00/9.076 GiB object_store_memory | |
D 11-22 19:59:26 backend_utils.py:929] | |
D 11-22 19:59:26 backend_utils.py:929] Demands: | |
D 11-22 19:59:26 backend_utils.py:929] (no resource demands) | |
D 11-22 19:59:26 backend_utils.py:929] | |
D 11-22 19:59:26 backend_utils.py:984] Reset start time, as new nodes are launched. (0 -> 2) | |
D 11-22 19:59:37 backend_utils.py:929] ======== Autoscaler status: 2022-11-22 19:59:36.095543 ======== | |
D 11-22 19:59:37 backend_utils.py:929] Node status | |
D 11-22 19:59:37 backend_utils.py:929] --------------------------------------------------------------- | |
D 11-22 19:59:37 backend_utils.py:929] Healthy: | |
D 11-22 19:59:37 backend_utils.py:929] 1 ray.head.default | |
D 11-22 19:59:37 backend_utils.py:929] Pending: | |
D 11-22 19:59:37 backend_utils.py:929] 172.31.85.253: ray.worker.default, waiting-for-ssh | |
D 11-22 19:59:37 backend_utils.py:929] Recent failures: | |
D 11-22 19:59:37 backend_utils.py:929] (no failures) | |
D 11-22 19:59:37 backend_utils.py:929] | |
D 11-22 19:59:37 backend_utils.py:929] Resources | |
D 11-22 19:59:37 backend_utils.py:929] --------------------------------------------------------------- | |
D 11-22 19:59:37 backend_utils.py:929] Usage: | |
D 11-22 19:59:37 backend_utils.py:929] 0.0/8.0 CPU | |
D 11-22 19:59:37 backend_utils.py:929] 0.00/18.153 GiB memory | |
D 11-22 19:59:37 backend_utils.py:929] 0.00/9.076 GiB object_store_memory | |
D 11-22 19:59:37 backend_utils.py:929] | |
D 11-22 19:59:37 backend_utils.py:929] Demands: | |
D 11-22 19:59:37 backend_utils.py:929] (no resource demands) | |
D 11-22 19:59:37 backend_utils.py:929] | |
D 11-22 19:59:47 backend_utils.py:929] ======== Autoscaler status: 2022-11-22 19:59:46.274131 ======== | |
D 11-22 19:59:47 backend_utils.py:929] Node status | |
D 11-22 19:59:47 backend_utils.py:929] --------------------------------------------------------------- | |
D 11-22 19:59:47 backend_utils.py:929] Healthy: | |
D 11-22 19:59:47 backend_utils.py:929] 1 ray.head.default | |
D 11-22 19:59:47 backend_utils.py:929] Pending: | |
D 11-22 19:59:47 backend_utils.py:929] 172.31.85.253: ray.worker.default, waiting-for-ssh | |
D 11-22 19:59:47 backend_utils.py:929] Recent failures: | |
D 11-22 19:59:47 backend_utils.py:929] (no failures) | |
D 11-22 19:59:47 backend_utils.py:929] | |
D 11-22 19:59:47 backend_utils.py:929] Resources | |
D 11-22 19:59:47 backend_utils.py:929] --------------------------------------------------------------- | |
D 11-22 19:59:47 backend_utils.py:929] Usage: | |
D 11-22 19:59:47 backend_utils.py:929] 0.0/8.0 CPU | |
D 11-22 19:59:47 backend_utils.py:929] 0.00/18.153 GiB memory | |
D 11-22 19:59:47 backend_utils.py:929] 0.00/9.076 GiB object_store_memory | |
D 11-22 19:59:47 backend_utils.py:929] | |
D 11-22 19:59:47 backend_utils.py:929] Demands: | |
D 11-22 19:59:47 backend_utils.py:929] (no resource demands) | |
D 11-22 19:59:47 backend_utils.py:929] | |
D 11-22 19:59:58 backend_utils.py:929] ======== Autoscaler status: 2022-11-22 19:59:56.422717 ======== | |
D 11-22 19:59:58 backend_utils.py:929] Node status | |
D 11-22 19:59:58 backend_utils.py:929] --------------------------------------------------------------- | |
D 11-22 19:59:58 backend_utils.py:929] Healthy: | |
D 11-22 19:59:58 backend_utils.py:929] 1 ray.head.default | |
D 11-22 19:59:58 backend_utils.py:929] Pending: | |
D 11-22 19:59:58 backend_utils.py:929] 172.31.85.253: ray.worker.default, waiting-for-ssh | |
D 11-22 19:59:58 backend_utils.py:929] Recent failures: | |
D 11-22 19:59:58 backend_utils.py:929] (no failures) | |
D 11-22 19:59:58 backend_utils.py:929] | |
D 11-22 19:59:58 backend_utils.py:929] Resources | |
D 11-22 19:59:58 backend_utils.py:929] --------------------------------------------------------------- | |
D 11-22 19:59:58 backend_utils.py:929] Usage: | |
D 11-22 19:59:58 backend_utils.py:929] 0.0/8.0 CPU | |
D 11-22 19:59:58 backend_utils.py:929] 0.00/18.153 GiB memory | |
D 11-22 19:59:58 backend_utils.py:929] 0.00/9.076 GiB object_store_memory | |
D 11-22 19:59:58 backend_utils.py:929] | |
D 11-22 19:59:58 backend_utils.py:929] Demands: | |
D 11-22 19:59:58 backend_utils.py:929] (no resource demands) | |
D 11-22 19:59:58 backend_utils.py:929] | |
D 11-22 20:00:09 backend_utils.py:929] ======== Autoscaler status: 2022-11-22 20:00:06.565296 ======== | |
D 11-22 20:00:09 backend_utils.py:929] Node status | |
D 11-22 20:00:09 backend_utils.py:929] --------------------------------------------------------------- | |
D 11-22 20:00:09 backend_utils.py:929] Healthy: | |
D 11-22 20:00:09 backend_utils.py:929] 1 ray.head.default | |
D 11-22 20:00:09 backend_utils.py:929] Pending: | |
D 11-22 20:00:09 backend_utils.py:929] 172.31.85.253: ray.worker.default, waiting-for-ssh | |
D 11-22 20:00:09 backend_utils.py:929] Recent failures: | |
D 11-22 20:00:09 backend_utils.py:929] (no failures) | |
D 11-22 20:00:09 backend_utils.py:929] | |
D 11-22 20:00:09 backend_utils.py:929] Resources | |
D 11-22 20:00:09 backend_utils.py:929] --------------------------------------------------------------- | |
D 11-22 20:00:09 backend_utils.py:929] Usage: | |
D 11-22 20:00:09 backend_utils.py:929] 0.0/8.0 CPU | |
D 11-22 20:00:09 backend_utils.py:929] 0.00/18.153 GiB memory | |
D 11-22 20:00:09 backend_utils.py:929] 0.00/9.076 GiB object_store_memory | |
D 11-22 20:00:09 backend_utils.py:929] | |
D 11-22 20:00:09 backend_utils.py:929] Demands: | |
D 11-22 20:00:09 backend_utils.py:929] (no resource demands) | |
D 11-22 20:00:09 backend_utils.py:929] | |
D 11-22 20:00:20 backend_utils.py:929] ======== Autoscaler status: 2022-11-22 20:00:16.772394 ======== | |
D 11-22 20:00:20 backend_utils.py:929] Node status | |
D 11-22 20:00:20 backend_utils.py:929] --------------------------------------------------------------- | |
D 11-22 20:00:20 backend_utils.py:929] Healthy: | |
D 11-22 20:00:20 backend_utils.py:929] 1 ray.head.default | |
D 11-22 20:00:20 backend_utils.py:929] Pending: | |
D 11-22 20:00:20 backend_utils.py:929] 172.31.85.253: ray.worker.default, setting-up | |
D 11-22 20:00:20 backend_utils.py:929] Recent failures: | |
D 11-22 20:00:20 backend_utils.py:929] (no failures) | |
D 11-22 20:00:20 backend_utils.py:929] | |
D 11-22 20:00:20 backend_utils.py:929] Resources | |
D 11-22 20:00:20 backend_utils.py:929] --------------------------------------------------------------- | |
D 11-22 20:00:20 backend_utils.py:929] Usage: | |
D 11-22 20:00:20 backend_utils.py:929] 0.0/8.0 CPU | |
D 11-22 20:00:20 backend_utils.py:929] 0.00/18.153 GiB memory | |
D 11-22 20:00:20 backend_utils.py:929] 0.00/9.076 GiB object_store_memory | |
D 11-22 20:00:20 backend_utils.py:929] | |
D 11-22 20:00:20 backend_utils.py:929] Demands: | |
D 11-22 20:00:20 backend_utils.py:929] (no resource demands) | |
D 11-22 20:00:20 backend_utils.py:929] | |
D 11-22 20:00:30 backend_utils.py:929] ======== Autoscaler status: 2022-11-22 20:00:27.036815 ======== | |
D 11-22 20:00:30 backend_utils.py:929] Node status | |
D 11-22 20:00:30 backend_utils.py:929] --------------------------------------------------------------- | |
D 11-22 20:00:30 backend_utils.py:929] Healthy: | |
D 11-22 20:00:30 backend_utils.py:929] 1 ray.head.default | |
D 11-22 20:00:30 backend_utils.py:929] Pending: | |
D 11-22 20:00:30 backend_utils.py:929] 172.31.85.253: ray.worker.default, setting-up | |
D 11-22 20:00:30 backend_utils.py:929] Recent failures: | |
D 11-22 20:00:30 backend_utils.py:929] (no failures) | |
D 11-22 20:00:30 backend_utils.py:929] | |
D 11-22 20:00:30 backend_utils.py:929] Resources | |
D 11-22 20:00:30 backend_utils.py:929] --------------------------------------------------------------- | |
D 11-22 20:00:30 backend_utils.py:929] Usage: | |
D 11-22 20:00:30 backend_utils.py:929] 0.0/8.0 CPU | |
D 11-22 20:00:30 backend_utils.py:929] 0.00/18.153 GiB memory | |
D 11-22 20:00:30 backend_utils.py:929] 0.00/9.076 GiB object_store_memory | |
D 11-22 20:00:30 backend_utils.py:929] | |
D 11-22 20:00:30 backend_utils.py:929] Demands: | |
D 11-22 20:00:30 backend_utils.py:929] (no resource demands) | |
D 11-22 20:00:30 backend_utils.py:929] | |
D 11-22 20:00:41 backend_utils.py:929] ======== Autoscaler status: 2022-11-22 20:00:37.223856 ======== | |
D 11-22 20:00:41 backend_utils.py:929] Node status | |
D 11-22 20:00:41 backend_utils.py:929] --------------------------------------------------------------- | |
D 11-22 20:00:41 backend_utils.py:929] Healthy: | |
D 11-22 20:00:41 backend_utils.py:929] 1 ray.head.default | |
D 11-22 20:00:41 backend_utils.py:929] Pending: | |
D 11-22 20:00:41 backend_utils.py:929] 172.31.85.253: ray.worker.default, setting-up | |
D 11-22 20:00:41 backend_utils.py:929] Recent failures: | |
D 11-22 20:00:41 backend_utils.py:929] (no failures) | |
D 11-22 20:00:41 backend_utils.py:929] | |
D 11-22 20:00:41 backend_utils.py:929] Resources | |
D 11-22 20:00:41 backend_utils.py:929] --------------------------------------------------------------- | |
D 11-22 20:00:41 backend_utils.py:929] Usage: | |
D 11-22 20:00:41 backend_utils.py:929] 0.0/8.0 CPU | |
D 11-22 20:00:41 backend_utils.py:929] 0.00/18.153 GiB memory | |
D 11-22 20:00:41 backend_utils.py:929] 0.00/9.076 GiB object_store_memory | |
D 11-22 20:00:41 backend_utils.py:929] | |
D 11-22 20:00:41 backend_utils.py:929] Demands: | |
D 11-22 20:00:41 backend_utils.py:929] (no resource demands) | |
D 11-22 20:00:41 backend_utils.py:929] | |
D 11-22 20:00:52 backend_utils.py:929] ======== Autoscaler status: 2022-11-22 20:00:47.377288 ======== | |
D 11-22 20:00:52 backend_utils.py:929] Node status | |
D 11-22 20:00:52 backend_utils.py:929] --------------------------------------------------------------- | |
D 11-22 20:00:52 backend_utils.py:929] Healthy: | |
D 11-22 20:00:52 backend_utils.py:929] 1 ray.head.default | |
D 11-22 20:00:52 backend_utils.py:929] Pending: | |
D 11-22 20:00:52 backend_utils.py:929] 172.31.85.253: ray.worker.default, setting-up | |
D 11-22 20:00:52 backend_utils.py:929] Recent failures: | |
D 11-22 20:00:52 backend_utils.py:929] (no failures) | |
D 11-22 20:00:52 backend_utils.py:929] | |
D 11-22 20:00:52 backend_utils.py:929] Resources | |
D 11-22 20:00:52 backend_utils.py:929] --------------------------------------------------------------- | |
D 11-22 20:00:52 backend_utils.py:929] Usage: | |
D 11-22 20:00:52 backend_utils.py:929] 0.0/8.0 CPU | |
D 11-22 20:00:52 backend_utils.py:929] 0.00/18.153 GiB memory | |
D 11-22 20:00:52 backend_utils.py:929] 0.00/9.076 GiB object_store_memory | |
D 11-22 20:00:52 backend_utils.py:929] | |
D 11-22 20:00:52 backend_utils.py:929] Demands: | |
D 11-22 20:00:52 backend_utils.py:929] (no resource demands) | |
D 11-22 20:00:52 backend_utils.py:929] | |
D 11-22 20:01:03 backend_utils.py:929] ======== Autoscaler status: 2022-11-22 20:01:02.646543 ======== | |
D 11-22 20:01:03 backend_utils.py:929] Node status | |
D 11-22 20:01:03 backend_utils.py:929] --------------------------------------------------------------- | |
D 11-22 20:01:03 backend_utils.py:929] Healthy: | |
D 11-22 20:01:03 backend_utils.py:929] 1 ray.worker.default | |
D 11-22 20:01:03 backend_utils.py:929] 1 ray.head.default | |
D 11-22 20:01:03 backend_utils.py:929] Pending: | |
D 11-22 20:01:03 backend_utils.py:929] (no pending nodes) | |
D 11-22 20:01:03 backend_utils.py:929] Recent failures: | |
D 11-22 20:01:03 backend_utils.py:929] (no failures) | |
D 11-22 20:01:03 backend_utils.py:929] | |
D 11-22 20:01:03 backend_utils.py:929] Resources | |
D 11-22 20:01:03 backend_utils.py:929] --------------------------------------------------------------- | |
D 11-22 20:01:03 backend_utils.py:929] Usage: | |
D 11-22 20:01:03 backend_utils.py:929] 0.0/16.0 CPU | |
D 11-22 20:01:03 backend_utils.py:929] 0.00/40.553 GiB memory | |
D 11-22 20:01:03 backend_utils.py:929] 0.00/18.234 GiB object_store_memory | |
D 11-22 20:01:03 backend_utils.py:929] | |
D 11-22 20:01:03 backend_utils.py:929] Demands: | |
D 11-22 20:01:03 backend_utils.py:929] (no resource demands) | |
D 11-22 20:01:03 backend_utils.py:929] | |
I 11-22 20:01:03 cloud_vm_ray_backend.py:1085] [32mSuccessfully provisioned or found existing VMs.[0m | |
I 11-22 20:01:56 cloud_vm_ray_backend.py:2162] [36mRunning setup on 2 nodes.[0m | |
Warning: Permanently added '3.87.69.138' (ECDSA) to the list of known hosts. | |
Warning: Permanently added '107.21.74.168' (ECDSA) to the list of known hosts. | |
running setup | |
running setup | |
I 11-22 20:01:58 cloud_vm_ray_backend.py:2172] [32mSetup completed.[0m | |
D 11-22 20:01:58 cloud_vm_ray_backend.py:2174] Setup took 2.05676531791687 seconds. | |
D 11-22 20:01:59 cloud_vm_ray_backend.py:420] Added Task with options: , num_cpus=0.5, placement_group=pg, placement_group_bundle_index=0 | |
D 11-22 20:01:59 cloud_vm_ray_backend.py:420] Added Task with options: , num_cpus=0.5, placement_group=pg, placement_group_bundle_index=1 | |
I 11-22 20:02:00 cloud_vm_ray_backend.py:2237] Job submitted with Job ID: [1m1[0m | |
I 11-22 20:02:00 cloud_vm_ray_backend.py:2266] [36mJob ID: [1m1[0m | |
I 11-22 20:02:00 cloud_vm_ray_backend.py:2266] To cancel the job: [1msky cancel test-autostop-3446f302-cb 1[0m | |
I 11-22 20:02:00 cloud_vm_ray_backend.py:2266] To stream the logs: [1msky logs test-autostop-3446f302-cb 1[0m | |
I 11-22 20:02:00 cloud_vm_ray_backend.py:2266] To view the job queue: [1msky queue test-autostop-3446f302-cb[0m | |
I 11-22 20:02:00 cloud_vm_ray_backend.py:2376] | |
I 11-22 20:02:00 cloud_vm_ray_backend.py:2376] [36mCluster name: [1mtest-autostop-3446f302-cb[0m | |
I 11-22 20:02:00 cloud_vm_ray_backend.py:2376] To log into the head VM: [1mssh test-autostop-3446f302-cb[0m | |
I 11-22 20:02:00 cloud_vm_ray_backend.py:2376] To submit a job: [1msky exec test-autostop-3446f302-cb yaml_file[0m | |
I 11-22 20:02:00 cloud_vm_ray_backend.py:2376] To stop the cluster: [1msky stop test-autostop-3446f302-cb[0m | |
I 11-22 20:02:00 cloud_vm_ray_backend.py:2376] To teardown the cluster: [1msky down test-autostop-3446f302-cb[0m | |
Clusters | |
NAME LAUNCHED RESOURCES STATUS AUTOSTOP COMMAND | |
test-stale-job-manual-restart-3446f302-c3 a few secs ago 1x AWS(m6i.2xlarge) UP - sky launch -c test-stale-... | |
test-autostop-3446f302-cb a few secs ago 2x AWS(m6i.2xlarge) UP - sky launch -y -d -c test-... | |
test-huggingface-3446f302-d8 28 secs ago 1x GCP(n1-highmem-8, {'V100': 1}) UP - sky launch -y -c test-hug... | |
test-cancel-gcp-3446f302-7e 30 secs ago 1x GCP(n1-highmem-8, {'V100': 1}) UP - sky launch -c test-cancel... | |
test-tpu-vm-3446f302-c0 59 secs ago 1x GCP(TPU-VM, {'tpu-v2-8': 1}, accelerator_args={'runtime_version': '... UP - sky launch -y -c test-tpu... | |
test-job-queue-3446f302-2e 1 min ago 1x AWS(p2.xlarge, {'K80': 1}) UP - sky exec test-job-queue-3... | |
test-zone-3446f302-3d 1 min ago 1x AWS(m6i.2xlarge) INIT - sky launch -y -c test-zon... | |
test-image-id-dict-with-region-3446f302-20 1 min ago 1x AWS(m6i.2xlarge, image_id={'us-west-1': 'skypilot:gpu-ubuntu-1804'}... INIT - sky launch -y -c test-ima... | |
test-cancel-aws-3446f302-03 2 mins ago 1x AWS(p3.2xlarge, {'V100': 1}) UP - sky launch -c test-cancel... | |
test-large-job-queue-3446f302-2b 2 mins ago 1x GCP(n1-highmem-8) UP - sky exec test-large-job-q... | |
test-gcp-start-stop-3446f302-aa 4 mins ago 2x GCP(n1-highmem-8) INIT - sky launch -y -c test-gcp... | |
test-multi-hostname-3446f302-c6 4 mins ago 2x GCP(n1-highmem-8) INIT - sky launch -y -c test-mul... | |
test-file-mounts-3446f302-29 5 mins ago 2x AWS(m6i.2xlarge) INIT - sky launch -y -c test-fil... | |
Managed spot controller (will be autostopped if idle for 10min) | |
NAME LAUNCHED RESOURCES STATUS AUTOSTOP COMMAND | |
sky-spot-controller-3446f302 a few secs ago 1x AWS(m6i.2xlarge, disk_size=50) UP 10m sky spot launch -n test-s... | |
1 cluster has auto{stop,down} scheduled. Refresh statuses with: sky status --refresh | |
[?25h+ sky autostop -y test-autostop-3446f302-cb -i 1 | |
Scheduling autostop on cluster 'test-autostop-3446f302-cb'...done | |
The cluster will be autostopped after 1 minute of idleness. | |
To cancel the autostop, run: sky autostop test-autostop-3446f302-cb --cancel | |
Scheduling autostop on 1 cluster ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 0:00:00 | |
+ sky status | grep test-autostop-3446f302-cb | grep "1m" | |
test-autostop-3446f302-cb 17 secs ago 2x AWS(m6i.2xlarge) UP 1m sky launch -y -d -c test-... | |
+ sleep 180 | |
+ sky status --refresh | grep test-autostop-3446f302-cb | grep STOPPED | |
Traceback (most recent call last): | |
File "/home/ubuntu/.conda/envs/sky/bin/sky", line 8, in <module> | |
sys.exit(cli()) | |
File "/home/ubuntu/.conda/envs/sky/lib/python3.10/site-packages/click/core.py", line 1128, in __call__ | |
return self.main(*args, **kwargs) | |
File "/home/ubuntu/.conda/envs/sky/lib/python3.10/site-packages/click/core.py", line 1053, in main | |
rv = self.invoke(ctx) | |
File "/home/ubuntu/skypilot/sky/utils/common_utils.py", line 188, in _record | |
return f(*args, **kwargs) | |
File "/home/ubuntu/skypilot/sky/cli.py", line 995, in invoke | |
return super().invoke(ctx) | |
File "/home/ubuntu/.conda/envs/sky/lib/python3.10/site-packages/click/core.py", line 1659, in invoke | |
return _process_result(sub_ctx.command.invoke(sub_ctx)) | |
File "/home/ubuntu/.conda/envs/sky/lib/python3.10/site-packages/click/core.py", line 1395, in invoke | |
return ctx.invoke(self.callback, **ctx.params) | |
File "/home/ubuntu/.conda/envs/sky/lib/python3.10/site-packages/click/core.py", line 754, in invoke | |
return __callback(*args, **kwargs) | |
File "/home/ubuntu/skypilot/sky/utils/common_utils.py", line 209, in _record | |
return f(*args, **kwargs) | |
File "/home/ubuntu/skypilot/sky/cli.py", line 1389, in status | |
cluster_records = core.status(refresh=refresh) | |
File "/home/ubuntu/skypilot/sky/utils/common_utils.py", line 209, in _record | |
return f(*args, **kwargs) | |
File "/home/ubuntu/skypilot/sky/core.py", line 62, in status | |
cluster_records = backend_utils.get_clusters(include_reserved=True, | |
File "/home/ubuntu/skypilot/sky/backends/backend_utils.py", line 1825, in get_clusters | |
updated_records = subprocess_utils.run_in_parallel( | |
File "/home/ubuntu/skypilot/sky/utils/subprocess_utils.py", line 51, in run_in_parallel | |
return list(p.imap(func, args)) | |
File "/home/ubuntu/.conda/envs/sky/lib/python3.10/multiprocessing/pool.py", line 873, in next | |
raise value | |
File "/home/ubuntu/.conda/envs/sky/lib/python3.10/multiprocessing/pool.py", line 125, in worker | |
result = (True, func(*args, **kwds)) | |
File "/home/ubuntu/skypilot/sky/backends/backend_utils.py", line 1818, in _refresh_cluster | |
record = _update_cluster_status(cluster_name, | |
File "/home/ubuntu/skypilot/sky/backends/backend_utils.py", line 1718, in _update_cluster_status | |
return _update_cluster_status_no_lock(cluster_name) | |
File "/home/ubuntu/skypilot/sky/backends/backend_utils.py", line 1616, in _update_cluster_status_no_lock | |
external_ips = handle.external_ips(use_cached_ips=False) | |
File "/home/ubuntu/skypilot/sky/backends/cloud_vm_ray_backend.py", line 1700, in external_ips | |
self._update_stable_cluster_ips(max_attempts=max_attempts) | |
File "/home/ubuntu/skypilot/sky/backends/cloud_vm_ray_backend.py", line 1683, in _update_stable_cluster_ips | |
stable_internal_external_ips = [internal_external_ips[0]] + sorted( | |
IndexError: list index out of range | |
[31mFailed[0m. | |
Reason: sky status --refresh | grep test-autostop-3446f302-cb | grep STOPPED | |
Log: less /tmp/autostop-0m23guys.log |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment