mleinart/hf-batch.patch

## hf-batch.patch
diff -uirb /home/sandello/carbon-0.9.9/lib/carbon/conf.py /opt/graphite/lib/carbon/conf.py
--- /home/sandello/carbon-0.9.9/lib/carbon/conf.py      2011-10-05 12:30:07.000000000 +0400
+++ /opt/graphite/lib/carbon/conf.py    2011-11-14 03:25:08.736500223 +0400
@@ -32,8 +32,10 @@
   MAX_CACHE_SIZE=float('inf'),
   MAX_UPDATES_PER_SECOND=1000,
   MAX_CREATES_PER_MINUTE=float('inf'),
+  HF_METRICS_PREFIX='',
+  HF_METRICS_BATCH_SIZE=100,
   LINE_RECEIVER_INTERFACE='0.0.0.0',
   LINE_RECEIVER_PORT=2003,
   ENABLE_UDP_LISTENER=False,
   UDP_RECEIVER_INTERFACE='0.0.0.0',
   UDP_RECEIVER_PORT=2003,
diff -uirb /home/sandello/carbon-0.9.9/lib/carbon/writer.py /opt/graphite/lib/carbon/writer.py
--- /home/sandello/carbon-0.9.9/lib/carbon/writer.py    2011-10-05 13:39:22.000000000 +0400
+++ /opt/graphite/lib/carbon/writer.py  2011-11-24 17:38:07.736541606 +0400
@@ -16,6 +16,7 @@
 import os
 import time
 from os.path import join, exists, dirname, basename
+from operator import itemgetter

 import whisper
 from carbon import state
@@ -43,7 +44,7 @@
   metrics = MetricCache.counts()

   t = time.time()
-  metrics.sort(key=lambda item: item[1], reverse=True) # by queue size, descending
+  metrics.sort(key=itemgetter(1), reverse=True) # by queue size, descending
   log.msg("Sorted %d cache queues in %.6f seconds" % (len(metrics), time.time() - t))

   if state.cacheTooFull and MetricCache.size < CACHE_SIZE_LOW_WATERMARK:
@@ -70,6 +71,11 @@
           pass

         continue
+    else:
+      if reactor.running:
+        if settings.HF_METRICS_PREFIX and metric.startswith(settings.HF_METRICS_PREFIX):
+          if settings.HF_METRICS_BATCH_SIZE and queueSize < settings.HF_METRICS_BATCH_SIZE:
+            continue

     try: # metrics can momentarily disappear from the MetricCache due to the implementation of MetricCache.store()
       datapoints = MetricCache.pop(metric)
@@ -148,7 +154,7 @@

     # Avoid churning CPU when only new metrics are in the cache
     if not dataWritten:
-      time.sleep(0.1)
+      time.sleep(1)


 def writeForever():
diff -uirb /home/sandello/carbon-0.9.9/conf/carbon.conf.example /opt/graphite/conf/carbon.conf.example
--- /home/sandello/carbon-0.9.9/conf/carbon.conf.example        2011-10-05 11:17:52.000000000 +0400
+++ /opt/graphite/conf/carbon.conf.example      2012-05-11 11:46:25.902738378 +0400
@@ -56,6 +56,15 @@
 # the files quickly but at the risk of slowing I/O down considerably for a while.
 MAX_CREATES_PER_MINUTE = 50

+# Do not allow small updates for high-frequency metrics. Frequent small updates
+# clob all I/O performance and significantly decrease overall system performance.
+# These two settigs describe how much datapoints are required for an update to
+# be issued (HF_METRICS_BATCH_SIZE) and what metrics are subject to batching
+# (HF_METRICS_PREFIX). Note that metrics are simply prefix-matched.
+# You can comment these settings to this feature.
+HF_METRICS_PREFIX = nodes
+HF_METRICS_BATCH_SIZE = 500
+
 LINE_RECEIVER_INTERFACE = 0.0.0.0
 LINE_RECEIVER_PORT = 2003

michael@saison:~/src/projects/graphite-web$ echo ~/Downloads/*.patch
/Users/michael/Downloads/hf-batch.patch
michael@saison:~/src/projects/graphite-web$ cat ~/Downloads/hf-batch.patch
diff -uirb /home/sandello/carbon-0.9.9/lib/carbon/conf.py /opt/graphite/lib/carbon/conf.py
--- /home/sandello/carbon-0.9.9/lib/carbon/conf.py      2011-10-05 12:30:07.000000000 +0400
+++ /opt/graphite/lib/carbon/conf.py    2011-11-14 03:25:08.736500223 +0400
@@ -32,8 +32,10 @@
   MAX_CACHE_SIZE=float('inf'),
   MAX_UPDATES_PER_SECOND=1000,
   MAX_CREATES_PER_MINUTE=float('inf'),
+  HF_METRICS_PREFIX='',
+  HF_METRICS_BATCH_SIZE=100,
   LINE_RECEIVER_INTERFACE='0.0.0.0',
   LINE_RECEIVER_PORT=2003,
   ENABLE_UDP_LISTENER=False,
   UDP_RECEIVER_INTERFACE='0.0.0.0',
   UDP_RECEIVER_PORT=2003,
diff -uirb /home/sandello/carbon-0.9.9/lib/carbon/writer.py /opt/graphite/lib/carbon/writer.py
--- /home/sandello/carbon-0.9.9/lib/carbon/writer.py    2011-10-05 13:39:22.000000000 +0400
+++ /opt/graphite/lib/carbon/writer.py  2011-11-24 17:38:07.736541606 +0400
@@ -16,6 +16,7 @@
 import os
 import time
 from os.path import join, exists, dirname, basename
+from operator import itemgetter

 import whisper
 from carbon import state
@@ -43,7 +44,7 @@
   metrics = MetricCache.counts()

   t = time.time()
-  metrics.sort(key=lambda item: item[1], reverse=True) # by queue size, descending
+  metrics.sort(key=itemgetter(1), reverse=True) # by queue size, descending
   log.msg("Sorted %d cache queues in %.6f seconds" % (len(metrics), time.time() - t))

   if state.cacheTooFull and MetricCache.size < CACHE_SIZE_LOW_WATERMARK:
@@ -70,6 +71,11 @@
           pass

         continue
+    else:
+      if reactor.running:
+        if settings.HF_METRICS_PREFIX and metric.startswith(settings.HF_METRICS_PREFIX):
+          if settings.HF_METRICS_BATCH_SIZE and queueSize < settings.HF_METRICS_BATCH_SIZE:
+            continue

     try: # metrics can momentarily disappear from the MetricCache due to the implementation of MetricCache.store()
       datapoints = MetricCache.pop(metric)
@@ -148,7 +154,7 @@

     # Avoid churning CPU when only new metrics are in the cache
     if not dataWritten:
-      time.sleep(0.1)
+      time.sleep(1)


 def writeForever():
diff -uirb /home/sandello/carbon-0.9.9/conf/carbon.conf.example /opt/graphite/conf/carbon.conf.example
--- /home/sandello/carbon-0.9.9/conf/carbon.conf.example        2011-10-05 11:17:52.000000000 +0400
+++ /opt/graphite/conf/carbon.conf.example      2012-05-11 11:46:25.902738378 +0400
@@ -56,6 +56,15 @@
 # the files quickly but at the risk of slowing I/O down considerably for a while.
 MAX_CREATES_PER_MINUTE = 50

+# Do not allow small updates for high-frequency metrics. Frequent small updates
+# clob all I/O performance and significantly decrease overall system performance.
+# These two settigs describe how much datapoints are required for an update to
+# be issued (HF_METRICS_BATCH_SIZE) and what metrics are subject to batching
+# (HF_METRICS_PREFIX). Note that metrics are simply prefix-matched.
+# You can comment these settings to this feature.
+HF_METRICS_PREFIX = nodes
+HF_METRICS_BATCH_SIZE = 500
+
 LINE_RECEIVER_INTERFACE = 0.0.0.0
 LINE_RECEIVER_PORT = 2003
	diff -uirb /home/sandello/carbon-0.9.9/lib/carbon/conf.py /opt/graphite/lib/carbon/conf.py
	--- /home/sandello/carbon-0.9.9/lib/carbon/conf.py 2011-10-05 12:30:07.000000000 +0400
	+++ /opt/graphite/lib/carbon/conf.py 2011-11-14 03:25:08.736500223 +0400
	@@ -32,8 +32,10 @@
	MAX_CACHE_SIZE=float('inf'),
	MAX_UPDATES_PER_SECOND=1000,
	MAX_CREATES_PER_MINUTE=float('inf'),
	+ HF_METRICS_PREFIX='',
	+ HF_METRICS_BATCH_SIZE=100,
	LINE_RECEIVER_INTERFACE='0.0.0.0',
	LINE_RECEIVER_PORT=2003,
	ENABLE_UDP_LISTENER=False,
	UDP_RECEIVER_INTERFACE='0.0.0.0',
	UDP_RECEIVER_PORT=2003,
	diff -uirb /home/sandello/carbon-0.9.9/lib/carbon/writer.py /opt/graphite/lib/carbon/writer.py
	--- /home/sandello/carbon-0.9.9/lib/carbon/writer.py 2011-10-05 13:39:22.000000000 +0400
	+++ /opt/graphite/lib/carbon/writer.py 2011-11-24 17:38:07.736541606 +0400
	@@ -16,6 +16,7 @@
	import os
	import time
	from os.path import join, exists, dirname, basename
	+from operator import itemgetter

	import whisper
	from carbon import state
	@@ -43,7 +44,7 @@
	metrics = MetricCache.counts()

	t = time.time()
	- metrics.sort(key=lambda item: item[1], reverse=True) # by queue size, descending
	+ metrics.sort(key=itemgetter(1), reverse=True) # by queue size, descending
	log.msg("Sorted %d cache queues in %.6f seconds" % (len(metrics), time.time() - t))

	if state.cacheTooFull and MetricCache.size < CACHE_SIZE_LOW_WATERMARK:
	@@ -70,6 +71,11 @@
	pass

	continue
	+ else:
	+ if reactor.running:
	+ if settings.HF_METRICS_PREFIX and metric.startswith(settings.HF_METRICS_PREFIX):
	+ if settings.HF_METRICS_BATCH_SIZE and queueSize < settings.HF_METRICS_BATCH_SIZE:
	+ continue

	try: # metrics can momentarily disappear from the MetricCache due to the implementation of MetricCache.store()
	datapoints = MetricCache.pop(metric)
	@@ -148,7 +154,7 @@

	# Avoid churning CPU when only new metrics are in the cache
	if not dataWritten:
	- time.sleep(0.1)
	+ time.sleep(1)


	def writeForever():
	diff -uirb /home/sandello/carbon-0.9.9/conf/carbon.conf.example /opt/graphite/conf/carbon.conf.example
	--- /home/sandello/carbon-0.9.9/conf/carbon.conf.example 2011-10-05 11:17:52.000000000 +0400
	+++ /opt/graphite/conf/carbon.conf.example 2012-05-11 11:46:25.902738378 +0400
	@@ -56,6 +56,15 @@
	# the files quickly but at the risk of slowing I/O down considerably for a while.
	MAX_CREATES_PER_MINUTE = 50

	+# Do not allow small updates for high-frequency metrics. Frequent small updates
	+# clob all I/O performance and significantly decrease overall system performance.
	+# These two settigs describe how much datapoints are required for an update to
	+# be issued (HF_METRICS_BATCH_SIZE) and what metrics are subject to batching
	+# (HF_METRICS_PREFIX). Note that metrics are simply prefix-matched.
	+# You can comment these settings to this feature.
	+HF_METRICS_PREFIX = nodes
	+HF_METRICS_BATCH_SIZE = 500
	+
	LINE_RECEIVER_INTERFACE = 0.0.0.0
	LINE_RECEIVER_PORT = 2003

	michael@saison:~/src/projects/graphite-web$ echo ~/Downloads/*.patch
	/Users/michael/Downloads/hf-batch.patch
	michael@saison:~/src/projects/graphite-web$ cat ~/Downloads/hf-batch.patch
	diff -uirb /home/sandello/carbon-0.9.9/lib/carbon/conf.py /opt/graphite/lib/carbon/conf.py
	--- /home/sandello/carbon-0.9.9/lib/carbon/conf.py 2011-10-05 12:30:07.000000000 +0400
	+++ /opt/graphite/lib/carbon/conf.py 2011-11-14 03:25:08.736500223 +0400
	@@ -32,8 +32,10 @@
	MAX_CACHE_SIZE=float('inf'),
	MAX_UPDATES_PER_SECOND=1000,
	MAX_CREATES_PER_MINUTE=float('inf'),
	+ HF_METRICS_PREFIX='',
	+ HF_METRICS_BATCH_SIZE=100,
	LINE_RECEIVER_INTERFACE='0.0.0.0',
	LINE_RECEIVER_PORT=2003,
	ENABLE_UDP_LISTENER=False,
	UDP_RECEIVER_INTERFACE='0.0.0.0',
	UDP_RECEIVER_PORT=2003,
	diff -uirb /home/sandello/carbon-0.9.9/lib/carbon/writer.py /opt/graphite/lib/carbon/writer.py
	--- /home/sandello/carbon-0.9.9/lib/carbon/writer.py 2011-10-05 13:39:22.000000000 +0400
	+++ /opt/graphite/lib/carbon/writer.py 2011-11-24 17:38:07.736541606 +0400
	@@ -16,6 +16,7 @@
	import os
	import time
	from os.path import join, exists, dirname, basename
	+from operator import itemgetter

	import whisper
	from carbon import state
	@@ -43,7 +44,7 @@
	metrics = MetricCache.counts()

	t = time.time()
	- metrics.sort(key=lambda item: item[1], reverse=True) # by queue size, descending
	+ metrics.sort(key=itemgetter(1), reverse=True) # by queue size, descending
	log.msg("Sorted %d cache queues in %.6f seconds" % (len(metrics), time.time() - t))

	if state.cacheTooFull and MetricCache.size < CACHE_SIZE_LOW_WATERMARK:
	@@ -70,6 +71,11 @@
	pass

	continue
	+ else:
	+ if reactor.running:
	+ if settings.HF_METRICS_PREFIX and metric.startswith(settings.HF_METRICS_PREFIX):
	+ if settings.HF_METRICS_BATCH_SIZE and queueSize < settings.HF_METRICS_BATCH_SIZE:
	+ continue

	try: # metrics can momentarily disappear from the MetricCache due to the implementation of MetricCache.store()
	datapoints = MetricCache.pop(metric)
	@@ -148,7 +154,7 @@

	# Avoid churning CPU when only new metrics are in the cache
	if not dataWritten:
	- time.sleep(0.1)
	+ time.sleep(1)


	def writeForever():
	diff -uirb /home/sandello/carbon-0.9.9/conf/carbon.conf.example /opt/graphite/conf/carbon.conf.example
	--- /home/sandello/carbon-0.9.9/conf/carbon.conf.example 2011-10-05 11:17:52.000000000 +0400
	+++ /opt/graphite/conf/carbon.conf.example 2012-05-11 11:46:25.902738378 +0400
	@@ -56,6 +56,15 @@
	# the files quickly but at the risk of slowing I/O down considerably for a while.
	MAX_CREATES_PER_MINUTE = 50

	+# Do not allow small updates for high-frequency metrics. Frequent small updates
	+# clob all I/O performance and significantly decrease overall system performance.
	+# These two settigs describe how much datapoints are required for an update to
	+# be issued (HF_METRICS_BATCH_SIZE) and what metrics are subject to batching
	+# (HF_METRICS_PREFIX). Note that metrics are simply prefix-matched.
	+# You can comment these settings to this feature.
	+HF_METRICS_PREFIX = nodes
	+HF_METRICS_BATCH_SIZE = 500
	+
	LINE_RECEIVER_INTERFACE = 0.0.0.0
	LINE_RECEIVER_PORT = 2003