pluma/bl-explained.js

## bl-explained.js
var DuplexStream = require('readable-stream').Duplex,
  // A duplex stream is a combination of a read stream and a write stream
  // For more on streams see http://nodeschool.io/#stream-adventure
  util = require('util')
  // The "util" module contains several node core utilities.
  // In this case we're just using its inheritance helpers.
  // See http://nodejs.org/api/util.html for API docs.

function BufferList(callback) {
// In JavaScript it is common to indicate that a function is a constructor by
// starting its name with a capital letter. Constructors are functions that are
// meant to be invoked with the "new" keyword.
// As you can tell, it also takes an argument called "callback". We'll shortly
// find out that this may actually be other things than a callback function.
  if (!(this instanceof BufferList)) return new BufferList(callback)
  // Calling constructors without "new" will result "this" not being a new object.
  // If the constructor was called with "new" the instanceof-check will pass.
  // Otherwise the constructor is invoked again with the passed argument and "new".

  this._bufs = []
  this.length = 0

  // BufferList can be invoked with one of three possible arguments:
  // - a callback function
  // - a Buffer object
  // - an array of Buffer objects
  // The argument is simply named "callback" in either case.

  if (typeof callback == 'function') {
    this._callback = callback
    // The function is stored as the "private" callback property
    // In JS it is customary to denote properties that should not be used directly
    // with an underscore prefix. This basically means "use at your own risk" and
    // usually marks properties that are implementation details.

    var piper = function(err) {
      // This is just a function that makes sure the callback is invoked if an error is
      // passed to it and that the callback is only invoked once (by setting it to "null").
        if (this._callback) {
          // The expression "this._callback" evaluates to either a function or "null" (see below).
          // As we're looking for a boolean in if expressions, the type of the value will be coerced
          // to boolean. In practice this means the following values will be "false" (called falsey):
          // The number 0, the empty string "" (or '') and the values false, null and undefined.
          // Everything else will be "true" (called truthy).

          this._callback(err)
          this._callback = null
        }
      }.bind(this)
    // "bind" guarantees a function will be invoked with its
    // "this" variable set to the given argument.

    this.on('pipe', function(src) {
      src.on('error', piper)
    })
    // BufferList has an "on" method, which suggests it is an event emitter.
    // The "on" method takes an event name and a callback which will be called
    // whenever the named event is emitted.
    // In this case we're passing a callback for the "pipe" event which makes sure
    // the piped stream will have a listener for its "error" event (set to the function
    // we defined above).

    // Streams expect to have a listener for their "error" events. If they don't,
    // bad things will happen: an unhandled error will be thrown, potentially killing
    // the entire process. Because streams are generally asynchronous, listening for
    // error events is the only way to catch these errors.

    this.on('unpipe', function(src) {
      src.removeListener('error', piper)
    })
    // This just makes sure the error listener is removed when the "unpipe" event
    // is emitted. This is the inverse of the previous event handler.
  } else if (Buffer.isBuffer(callback)) this.append(callback)
  // If the argument is not a function but a Buffer object, it is added to the BufferList.
  // This allows creating a BufferList from an existing Buffer in one line of code.
  else if (Array.isArray(callback)) {
    // This allows creating a BufferList from an array containing multiple Buffer objects.
    callback.forEach(function(b) {
      // The "forEach" method is the functional equivalent of a "for" loop.
      // The passed function will be called for every item in the array and
      // passed several arguments (an item, the item's index and the array itself).
      // In this case we're only using the item itself, here called "b".
      Buffer.isBuffer(b) && this.append(b)
      // This is the same check we saw earlier to see whether "b" is a Buffer object.
      // The in-line && is a shorthand for an if statement and can be read as:
      // if (Buffer.isBuffer(b)) {this.append(b);}
      // This is a matter of taste, but you'll often find it in situations where
      // it can simplify a longer block or statement into a single expression,
      // such as in a return statement. It's not strictly useful here, but more consise.
    }.bind(this))
    // Again, the function passed to "forEach" is bound to the current "this" variable,
    // guaranteeing the "this" inside the function refers to the same thing.
  }

  DuplexStream.call(this)
  // DuplexStream is a constructor. As we saw earlier, the "this" variable in a constructor
  // is set to a new object of the constructor's prototype if it is called with "new".
  // In this case, we're calling DuplexStream with the current "this" to let perform whatever
  // initializations it normally performs for new DuplexStream objects.
  // This is something you will frequently find in code that uses inheritance.
}

util.inherits(BufferList, DuplexStream)
// The util module provides an "inherits" function that makes it easier to write
// constructors with prototypes that have a real inheritance chain as you would find in e.g. Java.
// All you need to understand for now is that BufferList.prototype is now related to
// DuplexStream.prototype, which also means "instanceof" checks between BufferList instances
// and the DuplexStream constructor will return "true".

BufferList.prototype._offset = function(offset) {
  // Again the underscore denotes this is an internal method.
  // The idea behind this function seems to be to find out, given an offset
  // in the entirety of the BufferList (i.e. pretending the BufferList were one big Buffer)
  // the index of the Buffer object containing that offset in the BufferList
  // and the correct offset within that Buffer object.
  // E.g. if you have a BufferList consisting of two Buffer objects, each of length 5,
  // and you are asked for the offset 7 in that data, this function lets you find out
  // that you need to look at offset 2 (7 minus the length of the first Buffer)
  // in the second Buffer.
  var tot = 0, // "current total"
    i = 0, // "index"
    _t // "new total"
  for (; i < this._bufs.length; i++) {
    // The first part of the for-loop is just for assigning initial values.
    // As the values have already been assigned, it is left empty.
    // The loop then iterates over all values from 0 to (not including)
    // the length of the internal buffer.
    _t = tot + this._bufs[i].length
    // "new total is the current total plus the length of the buffer at the index"
    // In other words, we're adding up the length of the buffers in this buffer list.
    if (offset < _t) return [i, offset - tot]
    // If "offset" is less than the new total, we return an array containing two values:
    // - the index of the matching buffer
    // - the value of offset minus the current total
    tot = _t
    // Otherwise the current total is set to the new total and we continue the loop.
  }
  // This function assumes the offset is eventually matched and the list is not empty.
  // Otherwise it would return "undefined".

  // Note: another way to implement this logic would have been the following:
  // var i = 0, buffer
  // for (; i < this._bufs.length; i++) {
  //   buffer = this._bufs[i]
  //   if (offset < buffer.length) return [i, offset]
  //   offset -= buffer.length
  // }
}

BufferList.prototype.append = function(buf) {
  // This method adds a given value to the end of the BufferList.
  this._bufs.push(Buffer.isBuffer(buf) ? buf : new Buffer(buf))
  // Here you see a ternary expression used as an in-line version of an if/else statement.
  // This basically says "if buf is a Buffer object, use buf, otherwise create a new Buffer object from buf".
  // It simply makes sure the value is wrapped in a new Buffer object if it is not already one
  // before being added to the internal array holding this BufferList object's Buffer objects.
  this.length += buf.length
  // The BufferList object's length property contains the total length of all Buffer objects
  // contained therein, so it is updated accordingly.
  // Here we see another assumption: even if "buf" is not a Buffer object, it must have a length.
  // This will actually result in an inconsistent state if we pass an argument accepted by
  // the Buffer constructor that doesn't have a length property. For example, we can create
  // a new empty Buffer of length 10 by calling new Buffer(10). But the number "10" doesn't have
  // a length property, which means this code will throw an exception after creating and appending
  // the empty buffer it creates but before updating the length property of the BufferList.
  // From the API documentation we can infer this method is not meant to be called with a number.
  // You could argue that this is either intentionally undefined behaviour or a bug.
  return this
  // Returning "this" allows chaining multiple method calls like so:
  // myBufferList.append('hello').append(' ').append('world')
}

BufferList.prototype._write = function(buf, encoding, callback) {
  // This is an internal method for appending a value to the buffer and then calling a function.
  // As "encoding" is not actually used, we can assume this is only implemented to provide
  // compatibility with a similar method that requires an "encoding" argument (e.g. something
  // used by DuplexStream).
  this.append(buf)
  if (callback) callback()
  // Another "truthiness" check: if callback is truthy (e.g. a function), call it.
  // Of course this breaks if callback is something silly that just happens to be truthy,
  // but that would probably indicate this method is not invoked correctly.
}

BufferList.prototype._read = function(size) {
  // Another internal function, this time something more complicated.
  if (!this.length) return this.push(null)
  // If this.length is falsey, it is probably 0.
  // We won't find a definition of the "push" method in this module
  // because it comes from ReadableStream (via DuplexStream).
  // It has something to do with stream buffering.
  size = Math.min(size, this.length)
  // This just makes sure size is never larger than the actual
  // length of this BufferList (i.e. our total number of bytes).
  this.push(this.slice(0, size))
  // This again invokes the ominous "push" method.
  // We'll see what "slice" does further down below.
  this.consume(size)
  // This method is also explained further down below.
}

BufferList.prototype.end = function(chunk) {
  DuplexStream.prototype.end.call(this, chunk)
  // This method is already defined by DuplexStream.
  // Because we're overriding it here, we need to call
  // the original method manually, again making sure it is
  // called as a method of "this" and passing the argument.

  if (this._callback) {
    this._callback(null, this.slice())
    this._callback = null
  }
  // Here we see why we're overriding it.
  // In addition to DuplexStream's behaviour we want to call
  // the callback if it was defined.
  // Passing two arguments is a common idiom for node callbacks.
  // In this case "null" as first argument indicates no error occurred.
  // The second argument is the actual result, in this case, the result
  // of calling the slice method with no argument (see below).
  // Again we make sure the callback is only ever called once.
}

BufferList.prototype.get = function(index) {
  return this.slice(index, index + 1)[0]
  // Not very interesting, this method simply returns a slice
  // starting at the given index and exactly one byte long,
  // but unlike slice it gets the underlying value (i.e. a string)
  // rather than the buffer returned by slice (see below).
}

BufferList.prototype.slice = function(start, end) {
  return this.copy(null, 0, start, end)
  // Another surprise: slice is just a wrapper around "copy".
  // In Java you'd probably call this the "facade pattern",
  // but it's really just a shorthand.
}

BufferList.prototype.copy = function(dst, dstStart, srcStart, srcEnd) {
  // Couple of arguments expected her. "dst" means "destination", "src" means "source".
  // The "start" and "end" arguments are byte offsets, "dst" is a Buffer to copy bytes to.
  // In other words: this function takes a Buffer "dst" and copies the bytes between
  // the given "srcStart" and "srcEnd" into it, starting at "dstStart".
  if (typeof srcStart != 'number' || srcStart < 0) srcStart = 0
  if (typeof srcEnd != 'number' || srcEnd > this.length) srcEnd = this.length
  if (srcStart >= this.length) return dst || new Buffer(0)
  if (srcEnd <= 0) return dst || new Buffer(0)
  // This code mostly makes sure the values are somewhat sane.
  // Source start and end should be between zero and the number of bytes in this buffer.
  // If source start is an offset that is too big or if source end is an offset that is too small,
  // no copying happens and either the destination buffer or a new empty buffer of length zero is returned.
  // We can see that in addition to copying bytes to a destination buffer, this function also always
  // returns a buffer and that the destination buffer is an optional argument.

  var copy = !! dst,
    // The double negation is a common trick to copy the "truthiness" value of a variable.
    // This reads as "copy = there is a destination buffer", in other words:
    // if a destination buffer has been passed, we want to copy to that buffer and are in "copy" mode.
    off = this._offset(srcStart),
    // This uses the internal method we saw earlier. "off" is an array consisting of a buffer and
    // the relative offset within that buffer matching "source start".
    len = srcEnd - srcStart,
    // Simple arithmetic. We're determining the total number of bytes to copy.
    bytes = len,
    // And we're copying the same value to another variable, which probably means
    // one of the two variables will be modified at some point.
    bufoff = (copy && dstStart) || 0,
    // This is a combination of two idioms we saw before and an oddity of how && works.
    // As we can tell by the parentheses, first we execute "copy && dstStart", in other words,
    // "if copy then destination start" -- if we saved the result to a variable at this point,
    // it would equal the value of "copy" if "copy" is falsey or the value of "dstStart" if
    // "copy" is truthy; this may take beginners by surprise:
    //
    // Boolean expressions *don't* return a boolean value. To illustrate:
    // - "foo && bar" returns "bar" if "foo" is truthy but "foo" if "foo" is falsey
    // - "foo || bar" returns "foo" if "foo" is truthy but "bar" if "foo" is falsey
    // This is important to understand.
    //
    // The line above simply combines these two facts:
    // If "copy" is falsey, "copy && dstStart" results in "copy", which means "|| 0" will result in 0.
    // If "copy" is truthy, "copy && dstStart" results in "dstStart", which means:
    // - if "dstStart" is also truthy, the entire expression will result in the value of dstStart
    // - if "dstStart" is falsey, the entire expression will result in 0.
    // Also note that this assumes that if "copy" is truthy, "dstStart" is a number (not just any truthy value).
    //
    // This could code also have been written like this:
    // var bufoff = 0
    // if (copy) bufoff = dstStart
    // This would make the same assumption.
    start = off[1],
    // The second value in "off" is the relative offset, so we're copying that.
    l, i
    // Protip: avoid "l" as a variable name because it can be hard to distinguish.

  if (srcStart === 0 && srcEnd == this.length) {
    // If we're supposed to copy everything from offset zero to the total number
    // of bytes in this BufferList, we can simply copy everything.
    if (!copy)
    return Buffer.concat(this._bufs)
    // If we're not in copy-mode, simply return the result of having Buffer
    // concatenate all the buffers in our internal array (creating a new Buffer object
    // containing all the bytes in those buffers).

    for (i = 0; i < this._bufs.length; i++) {
      this._bufs[i].copy(dst, bufoff)
      bufoff += this._bufs[i].length
    }

    // This part could be rewritten to use forEach for consistency, e.g.:
    // this._bufs.forEach(function(buf) {
    //   buf.copy(dst, bufoff)
    //   bufoff += buf.length
    // })
    // It takes each buffer in this BufferList and tells it to copy its bytes to the
    // destination buffer, starting at the destination's offset "bufoff", which is increased
    // by the number of bytes just written each time.
    // We can see that the Buffer object has a similar "copy" method to our BufferList,
    // which means our BufferList could in fact also contain other BufferList objects.

    return dst
    // Once we're done, the destination buffer is returned, so either way, if we're just copying
    // everything, the copy function ends here.
  }
  // The following code will only be run if we haven't already returned.

  if (bytes <= this._bufs[off[0]].length - start) {
    // This checks whether the entire range of bytes we want to copy is contained in the same buffer.
    return copy ? this._bufs[off[0]].copy(dst, dstStart, start, start + bytes) : this._bufs[off[0]].slice(start, start + bytes)
    // Another ternary:
    // If we're in copy-mode, tell the matching buffer to copy the bytes in the given range to the destination buffer.
    // Otherwise tell it to return a new buffer containing the bytes in the given range.
    // This distinction is probably not strictly necessary if the copy method always returns a new buffer if passed a falsey
    // value as the destination buffer.
  }

  if (!copy)
    dst = new Buffer(len)
  // If we're not in copy mode, we need a new buffer that is big enough to contain all the bytes.
  // From this point on we can pretend we're in copy-mode because the logic will be the same.

  for (i = off[0]; i < this._bufs.length; i++) {
    // "off[0]" is the index of the buffer containing the start offset
    // We're iterating over the internal array of buffers.
    l = this._bufs[i].length - start
    // This will only be relevant on the first iteration.
    // Basically we're copying the buffer's length to the variable "l" (for "length")
    // but we're substracting "start", which, if you remember is simply "off[1]",
    // i.e. the relative offset within the buffer matching the absolute offset in
    // the range of bytes contained in the entire BufferList.

    if (bytes > l) {
      // "bytes" here means "bytes left to copy".
      // This just checks if we'll have bytes left to copying after copying the bytes
      // in this buffer.
      this._bufs[i].copy(dst, bufoff, start)
    } else {
      // If all the bytes left to copy are contained in this iteration's buffer,
      // we just need to copy that many more bytes from the buffer and then break out
      // of the loop.
      this._bufs[i].copy(dst, bufoff, start, start + bytes)
      break
    }

    bufoff += l
    bytes -= l
    // "bufoff" is the write offset in the destination buffer. We increase it by the
    // number of bytes we just wrote. "bytes" is the number of bytes left to write.
    // We decrease it by the same number of bytes.

    if (start) start = 0
    // This will only be executed in the first iteration because "start" will never
    // change again throughout this loop. Basically, after the first write, set "start" to zero,
    // because we always want to copy from the start of the next buffer.
  }

  return dst
  // Finally, return the destination buffer. If we weren't in copy mode, this is a new buffer.
}

BufferList.prototype.toString = function(encoding, start, end) {
  // This emulates Buffer's toString method. Basically it converts the bytes contained in this
  // BufferList (optionally limited to the given start and end offsets) to a string, using the
  // given encoding.
  return this.slice(start, end).toString(encoding)
  // As we can see, it's just another copy-call: it creates a new Buffer from the BufferList's
  // bytes in the given range and then retruns the result of calling that buffer's toString
  // method with the given encoding.
}

BufferList.prototype.consume = function(bytes) {
  // Finally the mysterious "consume" method.
  // "bytes" here means "number of bytes to consume".
  while (this._bufs.length) {
    // This loop repeats until this BufferList object's internal array of buffers
    // is empty. This implies we're removing buffers from the array in each iteration.
    if (bytes > this._bufs[0].length) {
      // This is a similar condition to what we saw before:
      // "if there are more bytes left to consume than contained in the buffer at index 0".
      // Because we are explicitly checking the buffer at array index 0, we can assume
      // that the code will remove buffers from the array starting at the beginning
      // rather than the end.
      bytes -= this._bufs[0].length
      // The "number of bytes left to consume" is reduced by the number of bytes in the buffer.
      // Some people consider modifying a value that was passed as an argument bad practice.
      this.length -= this._bufs[0].length
      // Now we're seeing that the consume method really will delete stuff:
      // The BufferList object's length is reduced by the number of bytes in the buffer.
      this._bufs.shift()
      // And the first buffer in the array is removed from the array.
      // So apparently we're not doing anything with the buffer itself.
    } else {
      // This will be executed when we have reached the last buffer to consume.
      this._bufs[0] = this._bufs[0].slice(bytes)
      // As we can see, the consume method is simply deleting all bytes in the BufferList
      // up to the given number of bytes.
      // This line replaces the first buffer in the array with a new buffer containing the
      // bytes in the given buffer, starting at the given offset.
      this.length -= bytes
      // And the number of bytes in the ByteList is adjusted one last time.
      break
      // We have to break out of the loop or else it wouldn't know to stop because there are
      // still buffers in the array.
    }
  }
  return this
  // Again, "this" is returned to allow chaining.
}

BufferList.prototype.duplicate = function() {
  // One last method: this duplicates ("clones") our BufferList.
  var i = 0,
    copy = new BufferList()
    // We're duplicating, so we need a new BufferList as a target first.

    for (; i < this._bufs.length; i++)
      copy.append(this._bufs[i])
      // As we can see, it's actually a shallow copy:
      // we're creating a new BufferList that simply contains exactly the
      // same buffers as our current BufferList. We're not actually duplicating
      // the buffers themselves.

    return copy
    // This time we can't return "this" because we need to return the result.
}

;
// This stray semicolon is necessary because the author relies on automatic semicolon insertion (ASI)
// (i.e. he doesn't use any semicolons in his code). Because JavaScript mostly doesn't care about
// line breaks, the following open parenthesis would otherwise be interpreted as if it were
// following the previous closing brace, turning the previous function expression into an
// anonymous function invokation (e.g. function() {/* body */}(/* arguments */)).
// This is why many people advocate simply using semicolons.
// ASI proponents sometimes use a negation (!) instead to achieve the same effect without using a semicolon.
(function() {
  //
  var methods = {
    'readDoubleBE': 8,
    'readDoubleLE': 8,
    'readFloatBE': 4,
    'readFloatLE': 4,
    'readInt32BE': 4,
    'readInt32LE': 4,
    'readUInt32BE': 4,
    'readUInt32LE': 4,
    'readInt16BE': 2,
    'readInt16LE': 2,
    'readUInt16BE': 2,
    'readUInt16LE': 2,
    'readInt8': 1,
    'readUInt8': 1
  }
  // This looks a bit strange at first glance, but it's just a mapping of
  // method names to values. We'll see what the values mean below.

  for (var m in methods) {
    // This iterates over the property names of the "methods" object.
    (function(m) {
      BufferList.prototype[m] = function(offset) {
        // Looks like we're defining a new method for each name.
        return this.slice(offset, offset + methods[m])[m](0)
        // If we substitute "m" for a name from the mapping above this is easier
        // to understand (e.g. "readInt8"):
        // return this.slice(offset, offset + methods.readInt8).readInt8(0)
        //
        // Or more verbosely:
        // var bytes = method.readInt8
        // var buffer = this.slice(offset, offset + bytes)
        // return buffer.readInt8(0)
        //
        // As you can see the magic numbers we saw before correspond to the
        // number of bytes to read for each method name.
        // The methods themselves correspond to methods with the same name
        // defined by Buffer. Looking at Node's API docs for Buffer tells us
        // that the hard-coded zero here simply means "return the first value
        // of this type in the buffer".
        // Of course the Buffer created by slice will only contain one value of
        // the given type, but we need to use slice because a multi-byte value
        // could be spread over multiple buffers.
      }
    }(m))
    // The anonymous function we just defined in this loop is directly invoked
    // with the iteration variable. This is called an IIFE (see below).
    // The reason the author is doing this is that because "m" is our iteration
    // variable, the final value of the variable will change with each loop.
    // Because we're using the variable in the methods we're defining, we would
    // thus define all methods with the same value of "m". By passing the value
    // in a function, we're copying the value for each iteration, side-stepping
    // the issue.
  }
  // Since we're already using ECMAScript 5 language features (e.g. forEach),
  // we could rewrite this part like this:
  // Object.keys(methods).forEach(function(m) {
  //   BufferList.prototype[m] = function(offset) {
  //     return this.slice(offset, offset + methods[m])[m](0)
  //   }
  // })
  // As you can see, this would also eliminate the need for the IIFE because the
  // code called in each iteration is already wrapped in a function.
}())
// That this function expression started with an opening parenthesis already suggested
// this, but now we know for sure: another immediately invoked function expression (IIFE)!

module.exports = BufferList
// There are two ways to export values in a node module:
// 1. defining properties of the "exports" variable.
// e.g. exports.foo = 'bar'
// This allows you to use the module like this:
// console.log(require('bl').foo) // "bar"
// 2. defining an "exports" property of the "module" variable.
// e.g. module.exports = 'bar'
// This allows you to use the module like this:
// console.log(require('bl')) // "bar"
//
// Because it is generally considered best practice in the node community to have a single
// export per module, most people always use "module.exports".
// Generally speaking, you probably only want to use the "exports" way if you want to export
// multiple things and can't clearly identify one of them as the primary purpose of the module.
// This is also a good indicator you might want to split the module up.