Skip to content

Instantly share code, notes, and snippets.

@unagi
Last active January 11, 2017 08:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save unagi/fd73eeebbd80fc2ec38000cfc195a86b to your computer and use it in GitHub Desktop.
Save unagi/fd73eeebbd80fc2ec38000cfc195a86b to your computer and use it in GitHub Desktop.
ping for datadog
#!/usr/bin/env python
import subprocess
import re
import time
from checks.network_checks import NetworkCheck, Status
class PingCheck(NetworkCheck):
def __init__(self, name, init_config, agentConfig, instances):
NetworkCheck.__init__(self, name, init_config, agentConfig, instances)
for instance in instances:
if not instance.get('isp', None):
raise Exception("All instances should have a 'isp' parameter")
# for initialize loss cnt
self.__simple_increment(instance, 'loss_cnt', 0)
def _check(self, instance):
start = time.time()
ping_timeout = int(self.init_config.get('ping_timeout', 4))
max_retry = int(self.init_config.get('check_interval', 15)) / ping_timeout
p = Ping(instance['addr'], ping_timeout)
failure_count = 0
for i in range(self.init_config.get('check_times', 1)):
try:
p.run()
self.__simple_histogram(instance, 'rtt', p._rtt)
time.sleep(self.init_config.get('ping_interval', 0.1))
ret = (Status.UP, "UP")
except:
self.__simple_increment(instance, 'loss_cnt')
failure_count += 1
ret = (Status.DOWN, "DOWN")
if failure_count >= max_retry:
break
finally:
self.__simple_increment(instance, 'total_cnt')
elapsed_time = time.time() - start
self.log.info("name:%s, elapsed_time:%s[sec]"
% (instance['name'], round(elapsed_time, 2)))
return ret
def __simple_increment(self, instance, category, value=1):
self.increment(
'%s.%s' % (self.init_config.get('basename', 'ping'),
category),
value,
tags=['isp:%s' % instance['isp'],
'locate:%s' % instance['name']]
)
def __simple_histogram(self, instance, category, value):
self.histogram(
'%s.%s' % (self.init_config.get('basename', 'ping'),
category),
value,
tags=['isp:%s' % instance['isp'],
'locate:%s' % instance['name']]
)
def report_as_service_check(self, sc_name, status, instance, msg=None):
pass
def _create_status_event(self, sc_name, status, msg, instance):
# TODO 5.3 remove that
pass
class Ping(object):
def __init__(self, host, timeout):
self._host = host
self._timeout = timeout
def run(self):
ping = subprocess.Popen(
["ping", "-c", "1", "-W", str(self._timeout), self._host],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
out, error = ping.communicate()
if error:
raise Exception('[NG]: ServerName->%s, Msg->"%s"'
% (self._host, error.rstrip()))
else:
try:
self._ttl = int(re.search("(?<=ttl=)[\d\.]+", out).group())
self._rtt = float(re.search("(?<=time=)[\d\.]+", out).group())
except AttributeError:
raise Exception('[NG]: ServerName->%s, Msg->"%s"'
% (self._host, 'cannot connect'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment