Skip to content

Instantly share code, notes, and snippets.

@aclisp
Last active June 14, 2017 12:52
Show Gist options
  • Save aclisp/bf6da022409113ff099f49e6e032c8cf to your computer and use it in GitHub Desktop.
Save aclisp/bf6da022409113ff099f49e6e032c8cf to your computer and use it in GitHub Desktop.
Send alarm for k8s Pods with heapster API.
{
"cap": [
{ "app": "__default", "s2s": "friday_alert", "cpu": 250, "mem": 2147483648 },
{ "app": "music-mobsrv", "s2s": "mobsrv", "cpu": 850, "mem": 2147483648 },
{ "app": "music-entsrv", "s2s": "entsrv", "cpu": 850, "mem": 2147483648 },
{ "app": "music-guild-service", "s2s": "guildservice", "cpu": 850, "mem": 2147483648 },
{ "app": "music-entms", "s2s": "entms", "cpu": 850, "mem": 2147483648 },
{ "app": "docker-registry-web", "s2s": "friday_alert", "cpu": 850, "mem": 2147483648 },
{ "app": "music-mobflwapp", "s2s": "mobFlwApp", "cpu": 1000, "mem": 2147483648 },
]
}
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import setproctitle # sudo pip install setproctitle
import yaml # sudo pip install PyYAML
import requests # sudo pip install requests
import json
import time
import pprint
import sys
from ctypes import *
import traceback
import os
import datetime
setproctitle.setproctitle('friday_alert')
dir_path = os.path.dirname(os.path.realpath(__file__))
bam = cdll.LoadLibrary(dir_path + "/bamSDK.so")
bam.bam_initialize(c_char_p("friday_alert"), c_ushort(8082), c_uint(0), c_uint(0))
pp = pprint.PrettyPrinter(indent=4)
# app - prefix of podname
# cpu - unit #cores*1000
# mem - unit bytes
CAP = [
{ "app": "__default", "s2s": "friday_alert", "cpu": 850, "mem": 2*1024*1024*1024 },
]
with open(dir_path + "/../conf/cap.conf") as ff:
CAP = yaml.safe_load(ff)["cap"]
pp.pprint(CAP)
# Given the pod name, returns the cpu and mem numbers beyond which to alert
def get_threshold(cap, podname):
for record in cap:
if podname.startswith(record["app"]):
return record
return cap[0]
def pod_ip_img(podname):
master_url = "http://master.friday.yy.com/api/v1/namespaces/default/pods"
pod = requests.get(master_url + "/" + podname).json()
ip = pod["status"]["hostIP"]
img = pod["spec"]["containers"][0]["image"]
return ip, img
def alert(podname, bizname, msg):
bam.bam_error_log_watcher(c_int(3), c_char_p(podname), c_int(1), c_char_p(' <LOCATOR:{"bizname":"' + bizname + '"}/> ' + msg))
def nowtime():
return datetime.datetime.now().strftime("%H:%M%B%d ")
def CORE(cpu):
return cpu/10
def GB(mem):
return mem/1024.0/1024/1024
def IMG(podimg):
return podimg.split('/')[-1]
def process():
podurl = "http://127.0.0.1:8082/api/v1/model/namespaces/default/pods"
podlist = requests.get(podurl).json()
for podname in podlist:
# CPU usage on all cores in millicores
cpu = requests.get(podurl + "/" + podname + "/metrics/cpu/usage_rate").json()
# Total working set usage. Working set is the memory being used and not easily dropped by the kernel
mem = requests.get(podurl + "/" + podname + "/metrics/memory/working_set").json()
cap = get_threshold(CAP, podname)
biz = cap["s2s"]
last1mcpu = cpu["metrics"][-1]["value"]
last1mmem = mem["metrics"][-1]["value"]
if last1mcpu > cap["cpu"] or last1mmem > cap["mem"]:
podip, podimg = pod_ip_img(podname)
if last1mcpu > cap["cpu"]:
msg = "进程CPU告警 {} {:.0f}% > {:.0f}% 镜像 {} POD {}".format(podip, CORE(last1mcpu), CORE(cap["cpu"]), IMG(podimg), podname)
print(nowtime() + biz + " " + msg)
alert(podname, biz, msg)
if last1mmem > cap["mem"]:
msg = "进程MEM告警 {} {:.1f}G > {:.1f}G 镜像 {} POD {}".format(podip, GB(last1mmem), GB(cap["mem"]), IMG(podimg), podname)
print(nowtime() + biz + " " + msg)
alert(podname, biz, msg)
while True:
try:
print("")
process()
sys.stdout.flush()
except:
print(sys.exc_info())
traceback.print_tb(sys.exc_info()[2])
time.sleep(15)
else:
time.sleep(60)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment