Following the https://gist.github.com/anatolijd/5576536 and https://answers.launchpad.net/graphite/+question/228472 I've limited the combinations (each CARBONLINK_HOSTS item is assigned a list of local carbon-cache instances only), and rearranged the output. Now it is pretty clear visible that webapp consistent hashing get_node(key) does the job…
# python hash_ring_selector2.py | |
DESTINATIONS : ['10.4.0.1:2104:a', '10.4.0.1:2204:b', | |
'10.4.0.2:2104:a', '10.4.0.2:2204:b'] | |
CARBONLINK_HOSTS: ['10.4.0.1:7102:a', '10.4.0.1:7202:b'] | |
result metric carbon-relay webapp | |
[ OK ] some.metric ('10.4.0.1', 'b') ('10.4.0.1', 'b') | |
[ OK ] mymetric ('10.4.0.1', 'a') ('10.4.0.1', 'a') | |
[ OK ] test.metric2 ('10.4.0.1', 'b') ('10.4.0.1', 'b') | |
[ OK ] test.metric3 ('10.4.0.1', 'b') ('10.4.0.1', 'b') | |
[ OK ] test.metric4 ('10.4.0.1', 'b') ('10.4.0.1', 'b') | |
[ OK ] test.tolik ('10.4.0.1', 'a') ('10.4.0.1', 'a') | |
[ OK ] test.natasha.z5poverhu.dura ('10.4.0.1', 'a') ('10.4.0.1', 'a') | |
[ OK ] system.loadavg_5min ('10.4.0.1', 'a') ('10.4.0.1', 'a') | |
[ OK ] system.loadavg_1min ('10.4.0.1', 'b') ('10.4.0.1', 'b') | |
[ OK ] carbon.relays.graphite-1-a.memUsage ('10.4.0.1', 'a') ('10.4.0.1', 'a') | |
[ OK ] carbon.relays.graphite-1-a.metricsReceived ('10.4.0.1', 'b') ('10.4.0.1', 'b') | |
[ OK ] secondYAxis(carbon.agents.*.pointsPerUpdate) ('10.4.0.1', 'b') ('10.4.0.1', 'b') | |
[ OK ] sumSeries(group(carbon.agents.*.creates)) ('10.4.0.1', 'a') ('10.4.0.1', 'a') | |
[ OK ] net.iostat.mongo-rfcalaiscache.rks ('10.4.0.1', 'a') ('10.4.0.1', 'a') | |
CARBONLINK_HOSTS: ['10.4.0.2:7102:a', '10.4.0.2:7202:b'] | |
result metric carbon-relay webapp | |
[ OK ] test.metric ('10.4.0.2', 'a') ('10.4.0.2', 'a') | |
[ OK ] test.metric1 ('10.4.0.2', 'a') ('10.4.0.2', 'a') | |
[ OK ] test.anatoliy ('10.4.0.2', 'a') ('10.4.0.2', 'a') | |
[ OK ] test.random ('10.4.0.2', 'a') ('10.4.0.2', 'a') | |
[ OK ] test.roma ('10.4.0.2', 'a') ('10.4.0.2', 'a') | |
[ OK ] test.slavik ('10.4.0.2', 'a') ('10.4.0.2', 'a') | |
[ OK ] test.valera ('10.4.0.2', 'b') ('10.4.0.2', 'b') | |
[ OK ] system.loadavg_15min ('10.4.0.2', 'b') ('10.4.0.2', 'b') | |
[ OK ] carbon.relays.graphite-1-a.cpuUsage ('10.4.0.2', 'b') ('10.4.0.2', 'b') | |
[ OK ] carbon.agents.graphite-1-b.memUsage ('10.4.0.2', 'a') ('10.4.0.2', 'a') | |
[ OK ] carbon.agents.graphite-1-b.cpuUsage ('10.4.0.2', 'b') ('10.4.0.2', 'b') | |
[ OK ] carbon.agents.graphite-1-b.metricsReceived ('10.4.0.2', 'a') ('10.4.0.2', 'a') | |
[ OK ] group(carbon.agents.*.metricsReceived) ('10.4.0.2', 'a') ('10.4.0.2', 'a') | |
[ OK ] stats_cos.ha-69b.ranker.source_cache.load_att ('10.4.0.2', 'b') ('10.4.0.2', 'b') | |
[ OK ] chef.es-66b.updated_resources ('10.4.0.2', 'b') ('10.4.0.2', 'b') | |
[ OK ] disk.es-63b._opt_elasticsearch_data ('10.4.0.2', 'b') ('10.4.0.2', 'b') | |
[ OK ] chef.es-*.updated_resources ('10.4.0.2', 'a') ('10.4.0.2', 'a') | |
[ OK ] disk.es-63b._opt_elasticsearch_data.used ('10.4.0.2', 'a') ('10.4.0.2', 'a') | |
[ OK ] hosts.es-67b.disk-xvdc.disk_ops.read ('10.4.0.2', 'b') ('10.4.0.2', 'b') | |
[ OK ] hosts.es-67b.procs.logstash_agent_3.rss ('10.4.0.2', 'b') ('10.4.0.2', 'b') | |
[ OK ] hosts.mon-80b.procs.backup_mon_8220b.cputime ('10.4.0.2', 'b') ('10.4.0.2', 'b') | |
[ OK ] www.jsp.anonymous.act.ClickEvent.1MinuteRate ('10.4.0.2', 'a') ('10.4.0.2', 'a') | |
[ OK ] www.jsp.external.act.ResultsAutoAnytime.count ('10.4.0.2', 'b') ('10.4.0.2', 'b') | |
# python hash_ring_selector2.py | |
DESTINATIONS : ['10.4.0.1:2104:a', '10.4.0.1:2204:b', '10.4.0.1:2304:c', | |
'10.4.0.2:2104:a', '10.4.0.2:2204:b', '10.4.0.2:2304:c', | |
'10.4.0.3:2104:a', '10.4.0.3:2204:b', '10.4.0.3:2304:c'] | |
CARBONLINK_HOSTS: ['10.4.0.1:7102:a', '10.4.0.1:7202:b', '10.4.0.1:7302:c'] | |
result metric carbon-relay webapp | |
[ OK ] some.metric ('10.4.0.1', 'c') ('10.4.0.1', 'c') | |
[ OK ] mymetric ('10.4.0.1', 'a') ('10.4.0.1', 'a') | |
[ OK ] test.valera ('10.4.0.1', 'c') ('10.4.0.1', 'c') | |
[ OK ] test.natasha.z5poverhu.dura ('10.4.0.1', 'a') ('10.4.0.1', 'a') | |
[ OK ] system.loadavg_5min ('10.4.0.1', 'a') ('10.4.0.1', 'a') | |
[ OK ] carbon.agents.graphite-1-b.metricsReceived ('10.4.0.1', 'c') ('10.4.0.1', 'c') | |
[ OK ] secondYAxis(carbon.agents.*.pointsPerUpdate) ('10.4.0.1', 'b') ('10.4.0.1', 'b') | |
[ OK ] sumSeries(group(carbon.agents.*.creates)) ('10.4.0.1', 'a') ('10.4.0.1', 'a') | |
CARBONLINK_HOSTS: ['10.4.0.2:7102:a', '10.4.0.2:7202:b', '10.4.0.2:7302:c'] | |
result metric carbon-relay webapp | |
[ OK ] test.metric1 ('10.4.0.2', 'c') ('10.4.0.2', 'c') | |
[ OK ] test.metric4 ('10.4.0.2', 'c') ('10.4.0.2', 'c') | |
[ OK ] test.anatoliy ('10.4.0.2', 'c') ('10.4.0.2', 'c') | |
[ OK ] test.random ('10.4.0.2', 'c') ('10.4.0.2', 'c') | |
[ OK ] system.loadavg_1min ('10.4.0.2', 'c') ('10.4.0.2', 'c') | |
[ OK ] carbon.agents.graphite-1-b.memUsage ('10.4.0.2', 'c') ('10.4.0.2', 'c') | |
[ OK ] chef.es-66b.updated_resources ('10.4.0.2', 'b') ('10.4.0.2', 'b') | |
[ OK ] disk.es-63b._opt_elasticsearch_data ('10.4.0.2', 'c') ('10.4.0.2', 'c') | |
[ OK ] chef.es-*.updated_resources ('10.4.0.2', 'c') ('10.4.0.2', 'c') | |
[ OK ] disk.es-63b._opt_elasticsearch_data.used ('10.4.0.2', 'c') ('10.4.0.2', 'c') | |
[ OK ] hosts.es-67b.disk-xvdc.disk_ops.read ('10.4.0.2', 'b') ('10.4.0.2', 'b') | |
[ OK ] www.jsp.external.act.ResultsAutoAnytime.count ('10.4.0.2', 'c') ('10.4.0.2', 'c') | |
CARBONLINK_HOSTS: ['10.4.0.3:7102:a', '10.4.0.3:7202:b', '10.4.0.3:7302:c'] | |
result metric carbon-relay webapp | |
[ OK ] test.metric ('10.4.0.3', 'b') ('10.4.0.3', 'b') | |
[ OK ] test.metric2 ('10.4.0.3', 'b') ('10.4.0.3', 'b') | |
[ OK ] test.metric3 ('10.4.0.3', 'a') ('10.4.0.3', 'a') | |
[ OK ] test.roma ('10.4.0.3', 'c') ('10.4.0.3', 'c') | |
[ OK ] test.slavik ('10.4.0.3', 'a') ('10.4.0.3', 'a') | |
[ OK ] test.tolik ('10.4.0.3', 'c') ('10.4.0.3', 'c') | |
[ OK ] system.loadavg_15min ('10.4.0.3', 'a') ('10.4.0.3', 'a') | |
[ OK ] carbon.relays.graphite-1-a.memUsage ('10.4.0.3', 'a') ('10.4.0.3', 'a') | |
[ OK ] carbon.relays.graphite-1-a.cpuUsage ('10.4.0.3', 'a') ('10.4.0.3', 'a') | |
[ OK ] carbon.relays.graphite-1-a.metricsReceived ('10.4.0.3', 'a') ('10.4.0.3', 'a') | |
[ OK ] carbon.agents.graphite-1-b.cpuUsage ('10.4.0.3', 'b') ('10.4.0.3', 'b') | |
[ OK ] group(carbon.agents.*.metricsReceived) ('10.4.0.3', 'c') ('10.4.0.3', 'c') | |
[ OK ] net.iostat.mongo-rfcalaiscache.rks ('10.4.0.3', 'b') ('10.4.0.3', 'b') | |
[ OK ] stats_cos.ha-69b.ranker.source_cache.load_att ('10.4.0.3', 'c') ('10.4.0.3', 'c') | |
[ OK ] hosts.es-67b.procs.logstash_agent_3.rss ('10.4.0.3', 'b') ('10.4.0.3', 'b') | |
[ OK ] hosts.mon-80b.procs.backup_mon_8220b.cputime ('10.4.0.3', 'a') ('10.4.0.3', 'a') | |
[ OK ] www.jsp.anonymous.act.ClickEvent.1MinuteRate ('10.4.0.3', 'a') ('10.4.0.3', 'a') |
#!/bin/python | |
import os,sys | |
sys.path.append('/opt/graphite/webapp') | |
os.environ['DJANGO_SETTINGS_MODULE'] = 'graphite.settings' | |
from graphite.storage import STORE, LOCAL_STORE | |
from graphite.render.hashing import ConsistentHashRing | |
from graphite.render.datalib import CarbonLinkPool | |
# Use CarbonLinkPool class, as it already contains the same routing methods that are used by either carbon-relay or webapp | |
def build_hosts(hosts=[]): | |
res = [] | |
for host in hosts: | |
parts = host.split(':') | |
server = parts[0] | |
port = int( parts[1] ) | |
if len(parts) > 2: | |
instance = parts[2] | |
else: | |
instance = None | |
res.append( (server, int(port), instance) ) | |
return res | |
def compare_routes(WRITE,READ,metrics=[],verbose=True): | |
print " result %45s %20s %20s" % ('metric','carbon-relay','webapp') | |
status = True | |
for metric in metrics: | |
wi = WRITE.select_host(metric) | |
ri = READ.select_host(metric) | |
res = " [ %4s ] %45s %20s %20s" % ('OK' if (wi == ri) else 'Fail', metric, wi, ri) | |
(wh,wc) = wi ; (rh,rc) = ri # compare only matching hosts | |
if wh != rh: | |
continue | |
if wi == ri: | |
if verbose: | |
print res | |
else: | |
# print only failed matches | |
status = False | |
print res | |
return status | |
CARBONLINK_TIMEOUT = 60 | |
# CARBONLINK_HOSTS for 2 nodes | |
CARBONLINK_HOSTS = [ | |
['10.4.0.1:7102:a','10.4.0.1:7202:b','10.4.0.1:7302:c'], # 1-st node in a cluster | |
['10.4.0.2:7102:a','10.4.0.2:7202:b','10.4.0.2:7302:c'], # 2nd node in a cluster | |
['10.4.0.3:7102:a','10.4.0.3:7202:b','10.4.0.3:7302:c'], # 3rd node in a cluster | |
] | |
# all carbon-cache instances | |
DESTINATIONS = [ | |
['10.4.0.1:2104:a','10.4.0.1:2204:b','10.4.0.1:2304:c', | |
'10.4.0.2:2104:a','10.4.0.2:2204:b','10.4.0.2:2304:c', | |
'10.4.0.3:2104:a','10.4.0.3:2204:b','10.4.0.3:2304:c'] | |
# ['10.4.0.1:2104:a','10.4.0.1:2204:b','10.4.0.2:2104:a','10.4.0.2:2204:b'] | |
] | |
# metrics to test | |
metrics = [ | |
"some.metric", | |
"mymetric", | |
"test.metric", | |
"test.metric1", | |
"test.metric2", | |
"test.metric3", | |
"test.metric4", | |
"test.anatoliy", | |
"test.random", | |
"test.roma", | |
"test.slavik", | |
"test.tolik", | |
"test.valera", | |
"test.natasha.z5poverhu.dura", | |
"system.loadavg_15min", | |
"system.loadavg_5min", | |
"system.loadavg_1min", | |
"carbon.relays.graphite-1-a.memUsage", | |
"carbon.relays.graphite-1-a.cpuUsage", | |
"carbon.relays.graphite-1-a.metricsReceived", | |
"carbon.agents.graphite-1-b.memUsage", | |
"carbon.agents.graphite-1-b.cpuUsage", | |
"carbon.agents.graphite-1-b.metricsReceived", | |
"group(carbon.agents.*.metricsReceived)", | |
"secondYAxis(carbon.agents.*.pointsPerUpdate)", | |
"sumSeries(group(carbon.agents.*.creates))", | |
"net.iostat.mongo-rfcalaiscache.rks", | |
"stats_cos.ha-69b.ranker.source_cache.load_att", | |
"chef.es-66b.updated_resources", | |
"disk.es-63b._opt_elasticsearch_data", | |
"chef.es-*.updated_resources", | |
"disk.es-63b._opt_elasticsearch_data.used", | |
"hosts.es-67b.disk-xvdc.disk_ops.read", | |
"hosts.es-67b.procs.logstash_agent_3.rss", | |
"hosts.mon-80b.procs.backup_mon_8220b.cputime", | |
"www.jsp.anonymous.act.ClickEvent.1MinuteRate", | |
"www.jsp.external.act.ResultsAutoAnytime.count" | |
] | |
# For every metric, emulate ConsistentHashRing.select_host(metric) for carbon-relay (WRITE) and webapp (READ). | |
# Use many different metrics for better representative. | |
WRITE = CarbonLinkPool(build_hosts(DESTINATIONS[0]), CARBONLINK_TIMEOUT) | |
print "\nDESTINATIONS : %s" % DESTINATIONS[0] | |
# webapp query carbon-cache at 1-st node | |
READ = CarbonLinkPool(build_hosts(CARBONLINK_HOSTS[0]), CARBONLINK_TIMEOUT) | |
print "\nCARBONLINK_HOSTS: %s\n" % CARBONLINK_HOSTS[0] | |
compare_routes(WRITE,READ,metrics,True) | |
# webapp query cache at 2-nd node | |
READ = CarbonLinkPool(build_hosts(CARBONLINK_HOSTS[1]), CARBONLINK_TIMEOUT) | |
print "\nCARBONLINK_HOSTS: %s\n" % CARBONLINK_HOSTS[1] | |
compare_routes(WRITE,READ,metrics,True) | |
# webapp query cache at 3-rd node | |
READ = CarbonLinkPool(build_hosts(CARBONLINK_HOSTS[2]), CARBONLINK_TIMEOUT) | |
print "\nCARBONLINK_HOSTS: %s\n" % CARBONLINK_HOSTS[2] | |
compare_routes(WRITE,READ,metrics,True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This comment has been minimized.
marco-hoyer commentedOct 11, 2013
Hi,
thanks a lot for your work. May it be that there was an older version of carbon, used for this script?
I used carbon/master and faced an import error for CarbonLinkPool. Following import statement works for me:
from graphite.carbonlink import CarbonLinkPool
greets