Skip to content

Instantly share code, notes, and snippets.

@mleinart
Created June 15, 2012 01:42
Show Gist options
  • Save mleinart/2934172 to your computer and use it in GitHub Desktop.
Save mleinart/2934172 to your computer and use it in GitHub Desktop.
High frequency batching patch for carbon
diff -uirb /home/sandello/carbon-0.9.9/lib/carbon/conf.py /opt/graphite/lib/carbon/conf.py
--- /home/sandello/carbon-0.9.9/lib/carbon/conf.py 2011-10-05 12:30:07.000000000 +0400
+++ /opt/graphite/lib/carbon/conf.py 2011-11-14 03:25:08.736500223 +0400
@@ -32,8 +32,10 @@
MAX_CACHE_SIZE=float('inf'),
MAX_UPDATES_PER_SECOND=1000,
MAX_CREATES_PER_MINUTE=float('inf'),
+ HF_METRICS_PREFIX='',
+ HF_METRICS_BATCH_SIZE=100,
LINE_RECEIVER_INTERFACE='0.0.0.0',
LINE_RECEIVER_PORT=2003,
ENABLE_UDP_LISTENER=False,
UDP_RECEIVER_INTERFACE='0.0.0.0',
UDP_RECEIVER_PORT=2003,
diff -uirb /home/sandello/carbon-0.9.9/lib/carbon/writer.py /opt/graphite/lib/carbon/writer.py
--- /home/sandello/carbon-0.9.9/lib/carbon/writer.py 2011-10-05 13:39:22.000000000 +0400
+++ /opt/graphite/lib/carbon/writer.py 2011-11-24 17:38:07.736541606 +0400
@@ -16,6 +16,7 @@
import os
import time
from os.path import join, exists, dirname, basename
+from operator import itemgetter
import whisper
from carbon import state
@@ -43,7 +44,7 @@
metrics = MetricCache.counts()
t = time.time()
- metrics.sort(key=lambda item: item[1], reverse=True) # by queue size, descending
+ metrics.sort(key=itemgetter(1), reverse=True) # by queue size, descending
log.msg("Sorted %d cache queues in %.6f seconds" % (len(metrics), time.time() - t))
if state.cacheTooFull and MetricCache.size < CACHE_SIZE_LOW_WATERMARK:
@@ -70,6 +71,11 @@
pass
continue
+ else:
+ if reactor.running:
+ if settings.HF_METRICS_PREFIX and metric.startswith(settings.HF_METRICS_PREFIX):
+ if settings.HF_METRICS_BATCH_SIZE and queueSize < settings.HF_METRICS_BATCH_SIZE:
+ continue
try: # metrics can momentarily disappear from the MetricCache due to the implementation of MetricCache.store()
datapoints = MetricCache.pop(metric)
@@ -148,7 +154,7 @@
# Avoid churning CPU when only new metrics are in the cache
if not dataWritten:
- time.sleep(0.1)
+ time.sleep(1)
def writeForever():
diff -uirb /home/sandello/carbon-0.9.9/conf/carbon.conf.example /opt/graphite/conf/carbon.conf.example
--- /home/sandello/carbon-0.9.9/conf/carbon.conf.example 2011-10-05 11:17:52.000000000 +0400
+++ /opt/graphite/conf/carbon.conf.example 2012-05-11 11:46:25.902738378 +0400
@@ -56,6 +56,15 @@
# the files quickly but at the risk of slowing I/O down considerably for a while.
MAX_CREATES_PER_MINUTE = 50
+# Do not allow small updates for high-frequency metrics. Frequent small updates
+# clob all I/O performance and significantly decrease overall system performance.
+# These two settigs describe how much datapoints are required for an update to
+# be issued (HF_METRICS_BATCH_SIZE) and what metrics are subject to batching
+# (HF_METRICS_PREFIX). Note that metrics are simply prefix-matched.
+# You can comment these settings to this feature.
+HF_METRICS_PREFIX = nodes
+HF_METRICS_BATCH_SIZE = 500
+
LINE_RECEIVER_INTERFACE = 0.0.0.0
LINE_RECEIVER_PORT = 2003
michael@saison:~/src/projects/graphite-web$ echo ~/Downloads/*.patch
/Users/michael/Downloads/hf-batch.patch
michael@saison:~/src/projects/graphite-web$ cat ~/Downloads/hf-batch.patch
diff -uirb /home/sandello/carbon-0.9.9/lib/carbon/conf.py /opt/graphite/lib/carbon/conf.py
--- /home/sandello/carbon-0.9.9/lib/carbon/conf.py 2011-10-05 12:30:07.000000000 +0400
+++ /opt/graphite/lib/carbon/conf.py 2011-11-14 03:25:08.736500223 +0400
@@ -32,8 +32,10 @@
MAX_CACHE_SIZE=float('inf'),
MAX_UPDATES_PER_SECOND=1000,
MAX_CREATES_PER_MINUTE=float('inf'),
+ HF_METRICS_PREFIX='',
+ HF_METRICS_BATCH_SIZE=100,
LINE_RECEIVER_INTERFACE='0.0.0.0',
LINE_RECEIVER_PORT=2003,
ENABLE_UDP_LISTENER=False,
UDP_RECEIVER_INTERFACE='0.0.0.0',
UDP_RECEIVER_PORT=2003,
diff -uirb /home/sandello/carbon-0.9.9/lib/carbon/writer.py /opt/graphite/lib/carbon/writer.py
--- /home/sandello/carbon-0.9.9/lib/carbon/writer.py 2011-10-05 13:39:22.000000000 +0400
+++ /opt/graphite/lib/carbon/writer.py 2011-11-24 17:38:07.736541606 +0400
@@ -16,6 +16,7 @@
import os
import time
from os.path import join, exists, dirname, basename
+from operator import itemgetter
import whisper
from carbon import state
@@ -43,7 +44,7 @@
metrics = MetricCache.counts()
t = time.time()
- metrics.sort(key=lambda item: item[1], reverse=True) # by queue size, descending
+ metrics.sort(key=itemgetter(1), reverse=True) # by queue size, descending
log.msg("Sorted %d cache queues in %.6f seconds" % (len(metrics), time.time() - t))
if state.cacheTooFull and MetricCache.size < CACHE_SIZE_LOW_WATERMARK:
@@ -70,6 +71,11 @@
pass
continue
+ else:
+ if reactor.running:
+ if settings.HF_METRICS_PREFIX and metric.startswith(settings.HF_METRICS_PREFIX):
+ if settings.HF_METRICS_BATCH_SIZE and queueSize < settings.HF_METRICS_BATCH_SIZE:
+ continue
try: # metrics can momentarily disappear from the MetricCache due to the implementation of MetricCache.store()
datapoints = MetricCache.pop(metric)
@@ -148,7 +154,7 @@
# Avoid churning CPU when only new metrics are in the cache
if not dataWritten:
- time.sleep(0.1)
+ time.sleep(1)
def writeForever():
diff -uirb /home/sandello/carbon-0.9.9/conf/carbon.conf.example /opt/graphite/conf/carbon.conf.example
--- /home/sandello/carbon-0.9.9/conf/carbon.conf.example 2011-10-05 11:17:52.000000000 +0400
+++ /opt/graphite/conf/carbon.conf.example 2012-05-11 11:46:25.902738378 +0400
@@ -56,6 +56,15 @@
# the files quickly but at the risk of slowing I/O down considerably for a while.
MAX_CREATES_PER_MINUTE = 50
+# Do not allow small updates for high-frequency metrics. Frequent small updates
+# clob all I/O performance and significantly decrease overall system performance.
+# These two settigs describe how much datapoints are required for an update to
+# be issued (HF_METRICS_BATCH_SIZE) and what metrics are subject to batching
+# (HF_METRICS_PREFIX). Note that metrics are simply prefix-matched.
+# You can comment these settings to this feature.
+HF_METRICS_PREFIX = nodes
+HF_METRICS_BATCH_SIZE = 500
+
LINE_RECEIVER_INTERFACE = 0.0.0.0
LINE_RECEIVER_PORT = 2003
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment