jonahkagan/waterfall-memory-test.coffee

## waterfall-memory-test.coffee
async = require 'async'
{inspect} = require 'util'
memwatch = require 'memwatch'

# Install
#   npm install async memwatch
# Run
#   coffee waterfall-memory-test.coffee

# The goal here is to investigate whether async.waterfall leaks memory if you
# exit a task early without calling its callback by calling a callback from an
# outer scope.

# This program runs a waterfall and prints out heap diffs. In the heap diffs,
# you should see an increase in the amount of LeakingClass instances allocated
# if there is a leak (like this example: https://github.com/lloyd/node-memwatch#heap-diffing)

# When this flag is on, a task will be exited early by calling an outer callback.
SHORT_CIRCUIT_WATERFALL = on

# When this flag is on, an unrelated memory leak will be created just so you
# can see what a memory leak would look like. This leak is created by pushing
# instances of LeakingClass onto a global array so they can't get garbage
# collected.
INDUCE_UNRELATED_LEAK = off

hd = new memwatch.HeapDiff()
#memwatch.on 'stats', console.log
#memwatch.on 'leak', (info) ->
#  console.log info
#  console.log inspect hd.end(), depth: null
#  hd = new memwatch.HeapDiff()

class LeakingClass

leaks = []

big_closure = ->
  leaking_instances = [1..100].map -> new LeakingClass
  leaks.push leaking_instances if INDUCE_UNRELATED_LEAK
  (cb) -> leaking_instances; setImmediate cb

outer_fn = (i, outer_cb) ->
  if i % 1000 is 0
    # hd.end() forces garbage collection
    console.log inspect hd.end(), depth: null
    hd = new memwatch.HeapDiff()
    console.log process.memoryUsage()
  async.waterfall [
    (cb_wf1) -> setImmediate cb_wf1
    (cb_wf2) -> setImmediate if SHORT_CIRCUIT_WATERFALL then outer_cb else cb_wf2
    big_closure()
  ], -> outer_cb()

async.eachSeries [1..10000], outer_fn, -> console.log 'done'

# Results
#
# When I run this with or without short-circuiting, I don't see any change in
# the count for LeakingClass, which leads me to believe that all allocated
# instances are garbage collected, so it's safe to short-circuit a waterfall
# this way.
#
# That means if you need to exit a waterfall in a task, you can wrap the entire
# waterfall in a function and immediately call it on the callback you were
# going to use at the end of the waterfall originally.
#
# For example, consider the following waterfall. Here, we want to short-circuit
# the waterfall if result1 is undefined, so we pass undefined results through
# the rest of the waterfall, checking for undefined at the beginning of every
# subsequent task. This sucks.
->
  async.waterfall [
    (cb_wf) ->
      # ...
      cb_wf result1
    (result1, cb_wf) ->
      return cb_wf() unless result1?
      # ...
      cb_wf result2
    (result2, cb_wf) ->
      return cb_wf() unless result2?
      # ...
      cb_wf result3
  ], (err, final_result) ->
    # ... do something with final_result, which may be undefined ...

# Instead, we can wrap the entire waterfall in a function, which we apply
# immediately to the original final callback. Then this final callback will be
# in scope within the waterfall, so we can use it exit the entire waterfall in
# the second task without having to do any handling in subsequent tasks.
->
  ((final_cb) ->
    async.waterfall [
      (cb_wf) -> # ...
      (result1, cb_wf) ->
        return final_cb() unless result1?
        # ...
        cb_wf result2
      (result2, cb_wf) ->
        # ...
        cb_wf result3
    ], final_cb) (err, final_result) ->
      # ... do something with final_result, which may be undefined ...
	async = require 'async'
	{inspect} = require 'util'
	memwatch = require 'memwatch'

	# Install
	# npm install async memwatch
	# Run
	# coffee waterfall-memory-test.coffee

	# The goal here is to investigate whether async.waterfall leaks memory if you
	# exit a task early without calling its callback by calling a callback from an
	# outer scope.

	# This program runs a waterfall and prints out heap diffs. In the heap diffs,
	# you should see an increase in the amount of LeakingClass instances allocated
	# if there is a leak (like this example: https://github.com/lloyd/node-memwatch#heap-diffing)

	# When this flag is on, a task will be exited early by calling an outer callback.
	SHORT_CIRCUIT_WATERFALL = on

	# When this flag is on, an unrelated memory leak will be created just so you
	# can see what a memory leak would look like. This leak is created by pushing
	# instances of LeakingClass onto a global array so they can't get garbage
	# collected.
	INDUCE_UNRELATED_LEAK = off

	hd = new memwatch.HeapDiff()
	#memwatch.on 'stats', console.log
	#memwatch.on 'leak', (info) ->
	# console.log info
	# console.log inspect hd.end(), depth: null
	# hd = new memwatch.HeapDiff()

	class LeakingClass

	leaks = []

	big_closure = ->
	leaking_instances = [1..100].map -> new LeakingClass
	leaks.push leaking_instances if INDUCE_UNRELATED_LEAK
	(cb) -> leaking_instances; setImmediate cb

	outer_fn = (i, outer_cb) ->
	if i % 1000 is 0
	# hd.end() forces garbage collection
	console.log inspect hd.end(), depth: null
	hd = new memwatch.HeapDiff()
	console.log process.memoryUsage()
	async.waterfall [
	(cb_wf1) -> setImmediate cb_wf1
	(cb_wf2) -> setImmediate if SHORT_CIRCUIT_WATERFALL then outer_cb else cb_wf2
	big_closure()
	], -> outer_cb()

	async.eachSeries [1..10000], outer_fn, -> console.log 'done'

	# Results
	#
	# When I run this with or without short-circuiting, I don't see any change in
	# the count for LeakingClass, which leads me to believe that all allocated
	# instances are garbage collected, so it's safe to short-circuit a waterfall
	# this way.
	#
	# That means if you need to exit a waterfall in a task, you can wrap the entire
	# waterfall in a function and immediately call it on the callback you were
	# going to use at the end of the waterfall originally.
	#
	# For example, consider the following waterfall. Here, we want to short-circuit
	# the waterfall if result1 is undefined, so we pass undefined results through
	# the rest of the waterfall, checking for undefined at the beginning of every
	# subsequent task. This sucks.
	->
	async.waterfall [
	(cb_wf) ->
	# ...
	cb_wf result1
	(result1, cb_wf) ->
	return cb_wf() unless result1?
	# ...
	cb_wf result2
	(result2, cb_wf) ->
	return cb_wf() unless result2?
	# ...
	cb_wf result3
	], (err, final_result) ->
	# ... do something with final_result, which may be undefined ...

	# Instead, we can wrap the entire waterfall in a function, which we apply
	# immediately to the original final callback. Then this final callback will be
	# in scope within the waterfall, so we can use it exit the entire waterfall in
	# the second task without having to do any handling in subsequent tasks.
	->
	((final_cb) ->
	async.waterfall [
	(cb_wf) -> # ...
	(result1, cb_wf) ->
	return final_cb() unless result1?
	# ...
	cb_wf result2
	(result2, cb_wf) ->
	# ...
	cb_wf result3
	], final_cb) (err, final_result) ->
	# ... do something with final_result, which may be undefined ...