Created
June 15, 2012 01:42
-
-
Save mleinart/2934172 to your computer and use it in GitHub Desktop.
High frequency batching patch for carbon
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff -uirb /home/sandello/carbon-0.9.9/lib/carbon/conf.py /opt/graphite/lib/carbon/conf.py | |
--- /home/sandello/carbon-0.9.9/lib/carbon/conf.py 2011-10-05 12:30:07.000000000 +0400 | |
+++ /opt/graphite/lib/carbon/conf.py 2011-11-14 03:25:08.736500223 +0400 | |
@@ -32,8 +32,10 @@ | |
MAX_CACHE_SIZE=float('inf'), | |
MAX_UPDATES_PER_SECOND=1000, | |
MAX_CREATES_PER_MINUTE=float('inf'), | |
+ HF_METRICS_PREFIX='', | |
+ HF_METRICS_BATCH_SIZE=100, | |
LINE_RECEIVER_INTERFACE='0.0.0.0', | |
LINE_RECEIVER_PORT=2003, | |
ENABLE_UDP_LISTENER=False, | |
UDP_RECEIVER_INTERFACE='0.0.0.0', | |
UDP_RECEIVER_PORT=2003, | |
diff -uirb /home/sandello/carbon-0.9.9/lib/carbon/writer.py /opt/graphite/lib/carbon/writer.py | |
--- /home/sandello/carbon-0.9.9/lib/carbon/writer.py 2011-10-05 13:39:22.000000000 +0400 | |
+++ /opt/graphite/lib/carbon/writer.py 2011-11-24 17:38:07.736541606 +0400 | |
@@ -16,6 +16,7 @@ | |
import os | |
import time | |
from os.path import join, exists, dirname, basename | |
+from operator import itemgetter | |
import whisper | |
from carbon import state | |
@@ -43,7 +44,7 @@ | |
metrics = MetricCache.counts() | |
t = time.time() | |
- metrics.sort(key=lambda item: item[1], reverse=True) # by queue size, descending | |
+ metrics.sort(key=itemgetter(1), reverse=True) # by queue size, descending | |
log.msg("Sorted %d cache queues in %.6f seconds" % (len(metrics), time.time() - t)) | |
if state.cacheTooFull and MetricCache.size < CACHE_SIZE_LOW_WATERMARK: | |
@@ -70,6 +71,11 @@ | |
pass | |
continue | |
+ else: | |
+ if reactor.running: | |
+ if settings.HF_METRICS_PREFIX and metric.startswith(settings.HF_METRICS_PREFIX): | |
+ if settings.HF_METRICS_BATCH_SIZE and queueSize < settings.HF_METRICS_BATCH_SIZE: | |
+ continue | |
try: # metrics can momentarily disappear from the MetricCache due to the implementation of MetricCache.store() | |
datapoints = MetricCache.pop(metric) | |
@@ -148,7 +154,7 @@ | |
# Avoid churning CPU when only new metrics are in the cache | |
if not dataWritten: | |
- time.sleep(0.1) | |
+ time.sleep(1) | |
def writeForever(): | |
diff -uirb /home/sandello/carbon-0.9.9/conf/carbon.conf.example /opt/graphite/conf/carbon.conf.example | |
--- /home/sandello/carbon-0.9.9/conf/carbon.conf.example 2011-10-05 11:17:52.000000000 +0400 | |
+++ /opt/graphite/conf/carbon.conf.example 2012-05-11 11:46:25.902738378 +0400 | |
@@ -56,6 +56,15 @@ | |
# the files quickly but at the risk of slowing I/O down considerably for a while. | |
MAX_CREATES_PER_MINUTE = 50 | |
+# Do not allow small updates for high-frequency metrics. Frequent small updates | |
+# clob all I/O performance and significantly decrease overall system performance. | |
+# These two settigs describe how much datapoints are required for an update to | |
+# be issued (HF_METRICS_BATCH_SIZE) and what metrics are subject to batching | |
+# (HF_METRICS_PREFIX). Note that metrics are simply prefix-matched. | |
+# You can comment these settings to this feature. | |
+HF_METRICS_PREFIX = nodes | |
+HF_METRICS_BATCH_SIZE = 500 | |
+ | |
LINE_RECEIVER_INTERFACE = 0.0.0.0 | |
LINE_RECEIVER_PORT = 2003 | |
michael@saison:~/src/projects/graphite-web$ echo ~/Downloads/*.patch | |
/Users/michael/Downloads/hf-batch.patch | |
michael@saison:~/src/projects/graphite-web$ cat ~/Downloads/hf-batch.patch | |
diff -uirb /home/sandello/carbon-0.9.9/lib/carbon/conf.py /opt/graphite/lib/carbon/conf.py | |
--- /home/sandello/carbon-0.9.9/lib/carbon/conf.py 2011-10-05 12:30:07.000000000 +0400 | |
+++ /opt/graphite/lib/carbon/conf.py 2011-11-14 03:25:08.736500223 +0400 | |
@@ -32,8 +32,10 @@ | |
MAX_CACHE_SIZE=float('inf'), | |
MAX_UPDATES_PER_SECOND=1000, | |
MAX_CREATES_PER_MINUTE=float('inf'), | |
+ HF_METRICS_PREFIX='', | |
+ HF_METRICS_BATCH_SIZE=100, | |
LINE_RECEIVER_INTERFACE='0.0.0.0', | |
LINE_RECEIVER_PORT=2003, | |
ENABLE_UDP_LISTENER=False, | |
UDP_RECEIVER_INTERFACE='0.0.0.0', | |
UDP_RECEIVER_PORT=2003, | |
diff -uirb /home/sandello/carbon-0.9.9/lib/carbon/writer.py /opt/graphite/lib/carbon/writer.py | |
--- /home/sandello/carbon-0.9.9/lib/carbon/writer.py 2011-10-05 13:39:22.000000000 +0400 | |
+++ /opt/graphite/lib/carbon/writer.py 2011-11-24 17:38:07.736541606 +0400 | |
@@ -16,6 +16,7 @@ | |
import os | |
import time | |
from os.path import join, exists, dirname, basename | |
+from operator import itemgetter | |
import whisper | |
from carbon import state | |
@@ -43,7 +44,7 @@ | |
metrics = MetricCache.counts() | |
t = time.time() | |
- metrics.sort(key=lambda item: item[1], reverse=True) # by queue size, descending | |
+ metrics.sort(key=itemgetter(1), reverse=True) # by queue size, descending | |
log.msg("Sorted %d cache queues in %.6f seconds" % (len(metrics), time.time() - t)) | |
if state.cacheTooFull and MetricCache.size < CACHE_SIZE_LOW_WATERMARK: | |
@@ -70,6 +71,11 @@ | |
pass | |
continue | |
+ else: | |
+ if reactor.running: | |
+ if settings.HF_METRICS_PREFIX and metric.startswith(settings.HF_METRICS_PREFIX): | |
+ if settings.HF_METRICS_BATCH_SIZE and queueSize < settings.HF_METRICS_BATCH_SIZE: | |
+ continue | |
try: # metrics can momentarily disappear from the MetricCache due to the implementation of MetricCache.store() | |
datapoints = MetricCache.pop(metric) | |
@@ -148,7 +154,7 @@ | |
# Avoid churning CPU when only new metrics are in the cache | |
if not dataWritten: | |
- time.sleep(0.1) | |
+ time.sleep(1) | |
def writeForever(): | |
diff -uirb /home/sandello/carbon-0.9.9/conf/carbon.conf.example /opt/graphite/conf/carbon.conf.example | |
--- /home/sandello/carbon-0.9.9/conf/carbon.conf.example 2011-10-05 11:17:52.000000000 +0400 | |
+++ /opt/graphite/conf/carbon.conf.example 2012-05-11 11:46:25.902738378 +0400 | |
@@ -56,6 +56,15 @@ | |
# the files quickly but at the risk of slowing I/O down considerably for a while. | |
MAX_CREATES_PER_MINUTE = 50 | |
+# Do not allow small updates for high-frequency metrics. Frequent small updates | |
+# clob all I/O performance and significantly decrease overall system performance. | |
+# These two settigs describe how much datapoints are required for an update to | |
+# be issued (HF_METRICS_BATCH_SIZE) and what metrics are subject to batching | |
+# (HF_METRICS_PREFIX). Note that metrics are simply prefix-matched. | |
+# You can comment these settings to this feature. | |
+HF_METRICS_PREFIX = nodes | |
+HF_METRICS_BATCH_SIZE = 500 | |
+ | |
LINE_RECEIVER_INTERFACE = 0.0.0.0 | |
LINE_RECEIVER_PORT = 2003 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment