Last active
March 4, 2024 19:29
-
-
Save why-not/1522a6132261d12a24fd1cb0b3b542a3 to your computer and use it in GitHub Desktop.
Automatically stopping an AWS machine when users are not actively using a machine via ssh, and the CPU load avg over the past 5 mins is less than THRESHOLD. This will avoid getting billed for the machine when not in use (except ofcourse your storage will still be billed unless you delete everything and decommission those as well, which this scri…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# shell commands being automated. | |
# w | |
# aws ec2 stop-instances --instance-id INSTANCE_ID | |
""" | |
The script is the easy part, installing it into the unfriendly(imo) cron system and | |
making all the permissions and paths are set correctly is another issue altogether. | |
cron will default to run as a root, so your scripts will fail because it is not | |
running in the correct python environment, also root might not have the paths for | |
commands like 'python'. So best to run this in the user space where you are testing | |
this script already. Here is a cron command to add to the current user's cron than | |
doing it for root. | |
sudo crontab -u ec2-user -e | |
Then add a line like this there. Notice how you have to activate the python project | |
you are in. Also notice I had to say the whole path to activate. Hopefully after | |
this it should just work. | |
* * * * * source /usr/local/bin/activate pytorch && python /home/ec2-user/workspace/code/stop_idle_aws.py > /tmp/stop_status | |
""" | |
import subprocess | |
import pandas as pd | |
import io | |
import psutil | |
CPU_IDLE_THRESH = 10 # Since there might be many machines cores, 100 pct cpu might come out to be 10 pct with 10 cores for eg. | |
USER_IDLE_THRESH = 3000 # 50 minutes | |
INSTANCE_ID = "i-0442388bf750d92e8" | |
def get_machine_idle(): | |
""" | |
Get the current CPU usage percentage. | |
The function uses the psutil library to obtain the current CPU utilization. | |
`psutil.cpu_percent()` provides a convenient way to calculate CPU usage. | |
This function calls `cpu_percent` with a 1-second interval, which is a | |
common practice to get a more accurate reading of CPU usage. | |
Returns: | |
float: The current CPU usage percentage. | |
""" | |
# Retrieve and return the CPU usage percentage. | |
# Interval of 10 second for averaging the CPU usage over that period. | |
# This provides a more accurate reading than an instantaneous value. | |
print("just before psutil call") | |
perc = psutil.cpu_percent(interval=10) | |
print("machine cpu average for the past 10 seconds is {}".format(perc)) | |
return perc < CPU_IDLE_THRESH | |
def wish_seconds(wish): | |
""" | |
Convert the 'w' command idle time format into seconds. | |
Supports formats like: 44.00s, 5:10, 1:28m, 3days, etc. | |
""" | |
if "days" in wish: | |
unit1 = int(wish.split("days")[0]) | |
seconds = unit1 * 86400 | |
elif "m" in wish: | |
unit1, unit2 = wish.split(":") | |
print(unit1, unit2) | |
unit1 = int(unit1) | |
unit2 = int(unit2.rstrip("m")) | |
seconds = (unit1 * 3600) + (unit2 * 60) | |
elif "s" in wish: | |
print("wish", wish) | |
seconds = int(float(wish.rstrip("s"))) | |
else: | |
unit1, unit2 = wish.split(":") | |
print(unit1, unit2) | |
unit1 = int(unit1) | |
unit2 = int(unit2) | |
seconds = (unit1 * 60) + unit2 | |
return seconds | |
def get_user_shortest_idle(): | |
# Execute the 'w' command and get its output | |
output = subprocess.check_output(['w'], text=True).strip() # '-h' to skip header | |
# Use pandas to read the output into a DataFrame | |
df = pd.read_csv(io.StringIO(output), delim_whitespace=True, skiprows=1) | |
# Extract the IDLE times | |
idle_times = df['IDLE'] | |
# Convert the idle times into seconds | |
seconds_values = idle_times.apply(wish_seconds) | |
# Find the shortest idle time | |
shortest = seconds_values.min() | |
# if shortest is nan then there is no idle time, so set it to a large value | |
# check if shortest is nan | |
if shortest != shortest: | |
shortest = USER_IDLE_THRESH + 1 | |
# print the shortest idle time | |
print("shortest: ", shortest) | |
return shortest > USER_IDLE_THRESH | |
def main(): | |
# check if there are any active ssh connections to the vm. | |
# this is to prevent the machine shutting down when I am working | |
# via the shell. | |
print("Getting user idle..") | |
user_idle = get_user_shortest_idle() | |
print("Getting mac idle...") | |
mac_idle = get_machine_idle() | |
print("user_idle: ", user_idle) | |
print("mac_idle: ", mac_idle) | |
""" | |
Check if both the machine and the user are idle, if so, shut down the vm. | |
Else print an appropriate message, and leave the vm alone. | |
""" | |
# user_idle = False # Override, remove when done! | |
if user_idle: | |
if mac_idle: | |
print ("Both Users and Machine Seems Idle.., Shutting down!") | |
subprocess.run(['aws', 'ec2', 'stop-instances', '--instance-ids', INSTANCE_ID]) | |
else: | |
print("Users are idle, but machine is busy, leaving the vm alone..") | |
else: | |
print ("Users are active, leaving the vm alone..") | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment