Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
import datetime
import json
import os
import sys
import requests
import urllib3
import boto3
import uptime
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
IDLE_TIME_MINUTES = 30
STOP_TIME_MINUTES = 60
AWS_REGION = 'ap-northeast-1'
SLACK_WEBHOOK_URL = 'https://hooks.slack.com/services/xxxxxxxxx/xxxxxxxxx/xxxxxxxxxxxxxxxxxxxx'
SESSION_ENDPOINT = 'https://localhost:8443/api/sessions'
METADATA_PATH = '/opt/ml/metadata/resource-metadata.json'
LAST_ACTIVITY_PATH = '/tmp/last_activity_timestamp.txt'
NOTIFICATION_FLAG_PATH = '/tmp/idle_notification.txt'
TIMESTAMP_FORMAT = '%Y-%m-%dT%H:%M:%S.%fz'
def get_metadata():
metadata = {}
try:
with open(METADATA_PATH) as f:
metadata = json.load(f)
except Exception as e:
print(e, sys.stderr)
return metadata
def get_last_activity():
if os.path.isfile(LAST_ACTIVITY_PATH):
with open(LAST_ACTIVITY_PATH) as f:
last_activity = datetime.datetime.strptime(f.read().strip(), TIMESTAMP_FORMAT)
else:
last_activity = uptime.boottime()
set_last_activity(last_activity)
return last_activity
def set_last_activity(last_activity):
with open(LAST_ACTIVITY_PATH, mode='w') as f:
f.write(last_activity.strftime(TIMESTAMP_FORMAT))
def check_idle_status():
idle = True
sessions = get_sessions()
last_activities = [get_last_activity()]
for session in sessions:
if 'kernel' in session and 'execution_state' in session['kernel']:
if session['kernel']['execution_state'] == 'idle':
last_activity = datetime.datetime.strptime(session['kernel']['last_activity'], TIMESTAMP_FORMAT)
last_activities.append(last_activity)
else:
idle = False
last_activity = max(last_activities)
set_last_activity(last_activity)
return idle
def get_sessions():
sessions = []
try:
sessions = requests.get(SESSION_ENDPOINT, verify=False).json()
except Exception as e:
print(e, sys.stderr)
return sessions
def get_idle_time():
last_activity = get_last_activity()
return (datetime.datetime.now() - last_activity).seconds
def stop_instance():
metadata = get_metadata()
if 'ResourceName' in metadata:
resource_name = metadata['ResourceName']
client = boto3.client('sagemaker')
client.stop_notebook_instance(NotebookInstanceName=resource_name)
def notify_idle():
metadata = get_metadata()
resource_name = ''
if 'ResourceName' in metadata:
resource_name = metadata['ResourceName']
message = f"""{IDLE_TIME_MINUTES}分以上アイドル状態が続いているNotebookがあります。
インスタンス名: *{resource_name}*
Notebook URL: https://{resource_name}.notebook.{AWS_REGION}.sagemaker.aws/lab
URL: https://{AWS_REGION}.console.aws.amazon.com/sagemaker/home?region={AWS_REGION}#/notebook-instances/{resource_name}
"""
response = requests.post(SLACK_WEBHOOK_URL, data=json.dumps({'text': message}))
return response.status_code
def notify_stop():
metadata = get_metadata()
resource_name = ''
if 'ResourceName' in metadata:
resource_name = metadata['ResourceName']
message = f""":warning: {STOP_TIME_MINUTES}分以上アイドル状態が続いたのでインスタンスを停止します。 :warning:
インスタンス名: *{resource_name}*
AWS Console URL: https://{AWS_REGION}.console.aws.amazon.com/sagemaker/home?region={AWS_REGION}#/notebook-instances/{resource_name}
"""
requests.post(SLACK_WEBHOOK_URL, data=json.dumps({'text': message}))
is_idle = check_idle_status()
if is_idle:
if (datetime.datetime.now() - get_last_activity()).total_seconds() >= IDLE_TIME_MINUTES * 60:
if not os.path.isfile(NOTIFICATION_FLAG_PATH):
status_code = notify_idle()
if status_code == 200:
with open(NOTIFICATION_FLAG_PATH, mode='w') as f:
f.write('true')
if (datetime.datetime.now() - get_last_activity()).total_seconds() >= STOP_TIME_MINUTES * 60:
notify_stop()
stop_instance()
else:
if os.path.isfile(NOTIFICATION_FLAG_PATH):
os.remove(NOTIFICATION_FLAG_PATH)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.