Last active
January 11, 2017 08:46
-
-
Save unagi/fd73eeebbd80fc2ec38000cfc195a86b to your computer and use it in GitHub Desktop.
ping for datadog
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import subprocess | |
import re | |
import time | |
from checks.network_checks import NetworkCheck, Status | |
class PingCheck(NetworkCheck): | |
def __init__(self, name, init_config, agentConfig, instances): | |
NetworkCheck.__init__(self, name, init_config, agentConfig, instances) | |
for instance in instances: | |
if not instance.get('isp', None): | |
raise Exception("All instances should have a 'isp' parameter") | |
# for initialize loss cnt | |
self.__simple_increment(instance, 'loss_cnt', 0) | |
def _check(self, instance): | |
start = time.time() | |
ping_timeout = int(self.init_config.get('ping_timeout', 4)) | |
max_retry = int(self.init_config.get('check_interval', 15)) / ping_timeout | |
p = Ping(instance['addr'], ping_timeout) | |
failure_count = 0 | |
for i in range(self.init_config.get('check_times', 1)): | |
try: | |
p.run() | |
self.__simple_histogram(instance, 'rtt', p._rtt) | |
time.sleep(self.init_config.get('ping_interval', 0.1)) | |
ret = (Status.UP, "UP") | |
except: | |
self.__simple_increment(instance, 'loss_cnt') | |
failure_count += 1 | |
ret = (Status.DOWN, "DOWN") | |
if failure_count >= max_retry: | |
break | |
finally: | |
self.__simple_increment(instance, 'total_cnt') | |
elapsed_time = time.time() - start | |
self.log.info("name:%s, elapsed_time:%s[sec]" | |
% (instance['name'], round(elapsed_time, 2))) | |
return ret | |
def __simple_increment(self, instance, category, value=1): | |
self.increment( | |
'%s.%s' % (self.init_config.get('basename', 'ping'), | |
category), | |
value, | |
tags=['isp:%s' % instance['isp'], | |
'locate:%s' % instance['name']] | |
) | |
def __simple_histogram(self, instance, category, value): | |
self.histogram( | |
'%s.%s' % (self.init_config.get('basename', 'ping'), | |
category), | |
value, | |
tags=['isp:%s' % instance['isp'], | |
'locate:%s' % instance['name']] | |
) | |
def report_as_service_check(self, sc_name, status, instance, msg=None): | |
pass | |
def _create_status_event(self, sc_name, status, msg, instance): | |
# TODO 5.3 remove that | |
pass | |
class Ping(object): | |
def __init__(self, host, timeout): | |
self._host = host | |
self._timeout = timeout | |
def run(self): | |
ping = subprocess.Popen( | |
["ping", "-c", "1", "-W", str(self._timeout), self._host], | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE | |
) | |
out, error = ping.communicate() | |
if error: | |
raise Exception('[NG]: ServerName->%s, Msg->"%s"' | |
% (self._host, error.rstrip())) | |
else: | |
try: | |
self._ttl = int(re.search("(?<=ttl=)[\d\.]+", out).group()) | |
self._rtt = float(re.search("(?<=time=)[\d\.]+", out).group()) | |
except AttributeError: | |
raise Exception('[NG]: ServerName->%s, Msg->"%s"' | |
% (self._host, 'cannot connect')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment