public
Last active

Request for Comments on new API for Step

  • Download Gist
twostep.js
JavaScript
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
module.exports = TwoStep;
 
var slice = Array.prototype.slice;
 
function Group(callback) {
this.args = [null];
this.left = 0;
this.callback = callback;
this.isDone = false;
}
 
Group.prototype.done = function done() {
if (this.isDone) return;
this.isDone = true;
this.callback.apply(null, this.args);
};
 
Group.prototype.error = function error(err) {
if (this.isDone) return;
this.isDone = true;
var callback = this.callback;
callback(err);
};
 
// Simple utility for passing a sync value to the next step.
Group.prototype.pass = function pass() {
var values = slice.call(arguments);
for (var i = 0, l = values.length; i < l; i++) {
this.args.push(values[i]);
}
};
 
// Register a slot in the next step and return a callback
Group.prototype.slot = function slot() {
var group = this;
var index = group.args.length;
group.args.length++;
group.left++;
return function (err, data) {
if (err) return group.error(err);
group.args[index] = data;
if (--group.left === 0) group.done();
};
}
 
// Creates a nested group where several callbacks go into a single array.
Group.prototype.makeGroup = function makeGroup() {
var group = this;
var index = this.args.length;
this.args.length++;
group.left++;
return new Group(function (err) {
if (err) return group.error(err);
var data = slice.call(arguments, 1);
group.args[index] = data;
if (--group.left === 0) group.done();
});
};
 
// Expose just for fun and extensibility
TwoStep.Group = Group;
 
// Stepper function
function exec(steps, args, callback) {
var pos = 0;
next.apply(null, args);
function next() {
var step = steps[pos++];
if (!step) {
callback && callback.apply(null, arguments);
return;
}
var group = new Group(next);
step.apply(group, arguments);
if (group.left === 0) group.done();
}
}
 
// Execute steps immedietly
function TwoStep() {
exec(slice.call(arguments), []);
}
 
 
// Create a composite function with steps built-in
TwoStep.fn = function () {
var steps = slice.call(arguments);
return function () {
var args = slice.call(arguments);
var callback = args.pop();
exec(steps, args, callback);
};
}
usage-fn.js
JavaScript
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
var TwoStep = require('./twostep');
var FS = require('fs');
var Path = require('path');
 
// Create a composite function using TwoStep.fn
var statdir = TwoStep.fn(
function (directory) {
this.pass(directory);
FS.readdir(directory, this.slot());
},
function (err, directory, fileNames) {
if (err) return this.error(err);
this.pass(directory, fileNames);
var group = this.makeGroup();
fileNames.forEach(function (name) {
FS.stat(name, group.slot());
});
},
function (err, directory, filenames, stats) {
if (err) return this.error(err);
var output = {};
filenames.forEach(function (name, i) {
var path = Path.join(directory, name);
output[path] = stats[i];
});
this.pass(output);
}
);
 
statdir(__dirname, function (err, stats) {
if (err) throw err;
console.log("Stats", stats);
})
usage.js
JavaScript
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
var TwoStep = require('./twostep');
var FS = require('fs');
 
TwoStep(
function one() {
this.pass(__filename + ".bak");
FS.readFile(__filename, 'utf8', this.slot());
},
function two(err, target, contents) {
if (err) throw err;
this.pass(target);
FS.writeFile(target, contents, this.slot())
},
function three(err, target) {
if (err) throw err;
console.log("%s written to successfully", target);
FS.readdir(__dirname, this.slot());
},
function four(err, fileNames) {
if (err) throw err;
this.pass(fileNames);
var group = this.makeGroup();
fileNames.forEach(function (filename) {
FS.stat(filename, group.slot());
});
},
function five(err, fileNames, stats) {
if (err) throw err;
this.pass(fileNames.filter(function (name, i) {
return stats[i].isFile();
}));
var group = this.makeGroup();
stats.forEach(function (stat, i) {
if (stat.isFile()) FS.readFile(fileNames[i], 'utf8', group.slot());
});
},
function six(err, fileNames, contents) {
if (err) throw err;
var merged = {};
fileNames.forEach(function (name, i) {
merged[name] = contents[i].substr(0, 80);
});
console.log(merged);
}
);

I'm proposing a slightly modified version of Step's API based on years of feedback (perhaps I shouldn't show the implementation, but here you go).

This is a API breaking change from the current version so it's named TwoStep for now, but I'm open to naming it Step2 or just bumping the version of Step in npm and deprecating the old API.

Important changes:

  • There is no distinction between this and this.parallel(). Both are now this.slot()
  • There is a new primitive called "pass" that simply passes a variable to the next step. Useful for times when you don't want to create a closure to hold state. Use as this.pass(value1, value2, ...) Each value will get it's own slot in the next step.
  • The group API is more or less the same, except it's less confusing. this.group() is now this.makeGroup() and the return value is the same type of object as this so you use group.slot() on the resulting group object.
  • TwoStep.fn works much like Step.fn, but now it's actually useful thanks to this.pass()

One problem I always had in step is how to break out of the steps. I often have nested steps especially for my command line tools, so async seems to be a better fit for me in these cases. If anything, providing an elegant way to break out of the current steps would be great.

@soggle, what do you mean "break out of the current step"? Could you give an example where this is useful?

Creationix we have validation that queries multiple databases multiple times (unfortunately we cannot change this) for example:

Request
1-Validate User (checks headers)
1.1--Done
2-Validate Data (checks data looks valid format / schema)
2.1--Validate Data for DB1 (checks for conflicts on a field in DB1) ... repeats for couple fields
2.1.1---Done
2.2--Validate Data for DB2 (checks for conflicts on a field in DB2)
2.2.1---Done

Breaking out when the data is not in a good schema (2) to begin while the user validation is taking place (1). Or stopping 2.1 / 2.2 when 1.1 fails if they have not been sent yet / stopping all the other branches via a signal or something allowing cleanup such as a finally (you could wait on 2 and 1 being done as an alternative to all of this).

Please consider my own additions too: https://github.com/akidee/stepc
Step should enable a mode where it ignores return values and just steps on call to next (context can be set) or this.

I forgot to mention, this new version doesn't care about return values. The this.pass() function takes care of that need. Besides, the return stuff was not friendly at all for coffeescript users.

Good to know. This is why I created Step.async. What do you think about my second change so that you bind this, and replace Step's this by next?

Akidee, I've considered that as well. Originally I rejected it because it made the steps different from normal node callback signatures, but now I see that doesn't really matter. Though I have realised one nice side effect of using this is that it makes nesting inline callbacks within a step very painful. You have to do var self = this or something. I see this as a feature since you shouldn't nest callbacks within steps. Step's implicit try..catch around each step can't reach into nested callbacks and so you lose the exception safety net. This will matter less once node gets domains. Also in the posted code here, there is no implicit try..catch around steps. Exceptions will blow up.

If you do end up passing step in as an argument to the callbacks, I recommend passing it as the first arg (other than err) so that you don't have to track the # of arguments you expect in order to properly get the step var

The new interface looks great!

I use Step a lot in my code. Here's a particular example of a Step sequence in the block processing code in node-bitcoin-p2p: https://gist.github.com/1534875

So here are my own wishlist items that I've come up with over the last year or so:

  • A way to have errors bypass intermediate steps. In other words, I want an (optional) way of getting rid of those if (err) throw err; that clutter my steps. Suppose you had:

    Step(
      function first() {
        this(null, "foo");
      },
      function second(err, param) {
        if (err) throw err;
    
        this(null, param+"bar");
      },
      function third(err, param) {
        if (err) throw err;
    
        this(null, param, "ding");
      },
      function fourth(err, p1, p2) {
        if (err) throw err;
    
        this(null, p1, p2+"dong");
      },
      callback, // Called with (err, "foobar", "dingdong")
      handleCallbackError // Called if the callback throws an error
    );
    

    This would now be:

    TwoStep(
      function first() {
        this.catchIn(callback); // Make errors skip through to callback
        this.pass("foo");
      },
      function second(param) {
        this.pass(param+"bar");
      },
      function third(param) {
        this.pass(param, "ding");
      },
      function fourth(p1, p2) {
        this.pass(p1, p2+"dong");
      },
      callback, // Called with (err, "foobar", "dingdong")
      handleCallbackError
    );
    

    Often, you will define your steps inline in the TwoStep() call, so you won't be able to get a reference to the function you want to use for handling errors. In that case you could reference the function by name instead:

    TwoStep(
      function first() {
        this.catchIn("fourth");
        this.pass("foo");
      },
      function second(param) {
        this.pass(param+"bar");
      },
      function third(param) {
        this.pass(param, "ding");
      },
      function fourth(err, p1, p2) {
        if (err) {
          // Handle error
        } else {
          // Do stuff
        }
      }
    );
    
  • There would also be a method skipTo that would allow you to explicitly skip to a function. Example:

    TwoStep(
      function first() {
        if (weather.isBad()) {
          this.pass("foobar", "cached");
          this.skipTo("fourth");
        } else {
          this.pass("foo");
        }
      },
      function second(err, param) {
        if (err) throw err;
    
        this.pass(param+"bar");
      },
      function third(err, param) {
        if (err) throw err;
    
        this.pass(param, "ding");
      },
      function fourth(err, p1, p2) {
        if (err) {
          // Handle error
        } else {
          // Do stuff
        }
      }
    );
    
  • This last one is just an idea. Based on the way TwoStep.fn works, I wonder if it made sense to have passSticky and slotSticky methods. Rewriting the statdir example:

    var statdir = TwoStep.fn(
      function (directory) {
        this.catchLast();
        this.passSticky(directory);
        FS.readdir(directory, this.slotSticky());
      },
      function (directory, fileNames) {
        var group = this.makeGroup();
        fileNames.forEach(function (name) {
          FS.stat(name, group.slot());
        });
      },
      function (directory, filenames, stats) {
        var output = {};
        filenames.forEach(function (name, i) {
          var path = Path.join(directory, name);
          output[path] = stats[i];
        });
        this.clear();
        this.pass(output);
      }
    );
    

    The call to catchLast tells Step to forward all errors to the last function (which is the user's callback here). The calls to passSticky and slotSticky remove the need to call next.pass again on every step. I think this would be beneficial for a longer sequence, especially one where one object is just passed through and acted upon. As you can see the second step is now four lines instead of six and a lot more readable. So if you had ten intermediate steps instead of just one this may be a significant improvement.

@creationix 1) The new API is pretty OK but the change is mainly useless for me since it's doing nearly the same as Step1. this.pass() seems to be a good idea, but it's easier to define the variable outside of the step closures so you can access it in any closure. I do not see any real innovation that will make me change my code. I will stay with Step1 ;)
2) You often call Step in a certain context, and since a context is more a semantic than a structural feature (step is a structural helper, it could be part of the language itself), it is not a good idea to use this as an accessor of structural functionality.
@cwolves I made this decision because node style callbacks always use the error as the first argument, and I did not want to go against this convention. In async.js, it's similar ( https://github.com/fjakobs/async.js ). Furthermore, it's more intuitive to unshift all explicit variables. If you call next(error, value), the real call would be next(nextNext, error, value) instead of next(error, value, nextNext). (And I do not prefer to change it now since I am heavily using stepc in many of my libraries.)

@akidee - Sorry, I may have mis-worded that. I'm suggesting ( err, next, value, value, value... ). If you have, instead ( err, value, value, ..., next ) you're left either having to manually keep track of how many arguments are expected or having to pop() arguments. And regardless of how much code you have depending on anything, that's not a good reason to make a design decision.

@cwolves I will think about it (since my last argument was in brackets)

@cwolves - Why would you ever have a variable number of arguments? (When you can have a makeGroup() instead?)

Also, if you have a variable number of arguments, you have to be using arguments[] anyway, so you might as well use var next = arguments.pop().

I'm not saying variable within a single callback, I'm saying I don't like the idea of ( err, val, val, val, next ) followed by (err, next) followed by (err, val, val, val /* 20 more */, next). Nor do I like the idea of being forced to type [].pop.apply( arguments ). (err, next, ...) is a sane, consistent API. The previous examples aren't :)

Great comments everyone. It looks like I need to decide between requiring the user to wrap step in a closure or optimize the API for Step.fn style usage. I personally prefer wrapping in a closure.

I find the ideas of named gotos and named error forwarding very interesting. Perhaps a name based system would be interesting where everything including parameters are named? Then you don't need passSticky and slotSticky. Once a value is set in the shared environment it's sticky till you override it.

Perhaps a name based system would be interesting where everything including parameters are named? Then you don't need passSticky and slotSticky. Once a value is set in the shared environment it's sticky till you override it.

Would be good to see an example. I'm not sure I understand this idea!

A way to have errors bypass intermediate steps. In other words, I want an (optional) way of getting rid of those if (err) throw err; that clutter my steps

Express middleware configuration API does this job well. We shall have a separate error handler to which the errors are routed. It will be really useful to have this feature in Step to avoid a lot of boilerplate code.

@justmoon, The way you're taking advantage of named functions is damn beautiful. Nearly brought a tear to my eye. It's such an elegant solution to such an ugly problem. I hope it makes it in to Step2.

@creationix, +1 to the Step.fn API.

I know I'm a little late to the thread, but if I could be so bold, here are some general comments and opinions:

Perhaps Step could add a persistant key/value object that's passed to each callback via this.data? It would remove the need for "sticky" params and reduce the number of calls to this.pass. Here's an example of how'd you use this.data:

var statdir = Step2.fn(
  function (directory) {
    this.data.directory = directory;
    FS.readdir(directory, this.slot());
  },
  function (err, fileNames) {
    this.data.fileNames = fileNames;
    var group = this.makeGroup();
    fileNames.forEach(function (name) {
      FS.stat(name, group.slot());
    });
  },
  function (err, stats) {
    var output = {};
    var fileNames = this.data.fileNames, directory = this.data.directory;
    fileNames.forEach(function (name, i) {
      var path = Path.join(directory, name);
      output[path] = stats[i];
    });
    this.pass(output);
  }
);

I feel the that this.data helps readability as well as minimizes the API's surface area. It could also reduces verboseness in some cases -- image a 10 phased Step function where the first callback produces a value you need in the ninth.

As for the proposed this.catchIn function, I'm against it. I feel you could instead utilize the this.skipTo functionality to achieve something very similar. Here's an example:

function ohShit() {
  // Damage control...
}
Step2(
  function one() {
    // something
  },
  function two(err, val) {
    if(err) { return this.skipTo("cleanup"); }
    // stuff
  },
  function three(err, val) {
    if(err) { return this.skipTo(ohShit); } // Completely stops the Step chain
    // stuff
  },
  function four(err, val) {
    if(err) { return this.skipTo("cleanup"); }
    // Never called if there's an err in three
  }
  function cleanup(err, val) {
    if(err) { /* Handle error */ }
    // Do clean up
  }
);

I would prefer this technique because it allows/encourages error handling at each step (which is more idiomatic in node.js, IMO) and reduces the size of the API.

Also, has anyone thought of a good solution for APIs that pass more than two parameter to their callbacks? For example, request passes three parameters to nearly every callback: err, response, body. Step, unfortunately, only considers err and response, and ignores body. This is only a minor annoyance because response.body happens to equal body, but I could image other APIs where there isn't a good work around. Can anyone think of a good solution for this use case? Best I could come up with is this:

Step2(
  function one() {
    var next = this;
    api.doIt(function(err, alpha, bravo, charlie, delta) {
      next(err, { alpha: alpha, bravo: bravo, charlie: charlie, delta: delta });
    });
  },
  function two(err, args) {
    // ...
  }
);

But this seems more like a workaround than a solution.

TwoStep look's interesting. Any plans about release?

After seeing all the feedback and pondering this problem, I've come to the conclusion that step is doomed to either be too specific in goal to help most situations or become so flexible and complex and it's a system unto itself.

If people want, I can push a step update using the code above. It's simple and better than what's currently in step. I don't want to expand the scope and compete with the async lib. I would never use it myself (as it is I don't really ever use step).

My new experiment is at https://github.com/creationix/safereturn. The general idea is to not impose structure on the programmer, but rather help them write good callback based functions.

That's unfortunate.

I was personally very excited about the direction TwoStep was going in. I hope you reconsider.

In the mean while, I've decided to continue explore some of the possibilities mentioned in thread in my own repository: https://github.com/xavi-/two-step

The code is still a bit rough, but all the tests are passing. Also, because it's a new repository I felt it was a good opportunity to try out some dramatic API changes (mostly for the sake of readability). Here's a quick overview of the changes I made:

I'm also hoping to improve debuggability in TwoStep by allowing users to give each parameter a name and by attaching a "step info object" to any error objects that occur inside a step chain. The "step info object" would contain the name of the callback where the exception occurred as well the name and index of the parameter that was being calculated. Here's an example:

TwoStep(
    function callDB() {
        userdb.get(userId, this.val("user-data"));
        productdb.get(productId, this.val("product-data"));
    },
    function processPage(err, user, product) {
        if(err) {
            // err.step === { name: "callDB", paramName: "user-data", paramIdx: 1 }
        }

        /* ... process page ... */
    },
    // ...
);

I'm still trying to work out the details.

At this point I'm open to changing anything and everything. Please voice your opinions.

Thanks for good work. We used the first step, but we didn't have this.pass function. We used your gist as base and add some features - simple wrappers, try catch to steps and tests. Source can be found here https://github.com/Nordberg/node-twostep and in npm twostep.

Great work @nordberg! I mentioned your project on twitter. https://twitter.com/creationix/status/215446113422032896

I think the new API comes directly out of everyone's demand. I wish I had noticed this effort earlier.

While I'm using Step I always do this.pass() in this way:

this.parallel()(undefined, results);

Now there is an API to do it explicitly. Other changes in the new API are just name changes. They are also good because they look more intuitive than before.

And I don't return values synchronously either. If I do that my syntax checker always warns me that some branches return without values while others do return values. Therefore currently I stick with this.parallel()(undefined, results); or callback(undefined, results);, which looks pretty stupid. I should consider migrating to the new API :)

So far I'm happy with Step except for two messes. One is, as I reported and fixed, caused by the process.nextTick() check. Another is caused by try ... catch in Step. I should have reported an issue, anyway, here it is:

    try {
      lock = true;
      var result = fn.apply(next, arguments);
    } catch (e) {
      // Pass any exceptions on through the next callback
      next(e);
    }

Suppose in certain step of fn.apply(next, arguments);, fn wanna finish prematurely and call the final callback, and the final callback throws an exception, then it gets catched by catch(e){next(e);} -- and we can't finish prematurely, have to go on to next step!

With nested callbacks and exceptions things get more complicated, so I try not to throw or incur an exception in my asynchronous code.

I hope these two messes has been taken care of. Anyway I'll have a look into the code of TwoStep and try it soon:)

My attemp (I ported test from Step, but it doesn't pass all yet). Step is anyway much simpler and elegant then async. It covers 80% of use cases. For thoes 20% we can expand API if it suits.

We (@gameclosure) loved the ideas behind this gist, so we made our own "fork" of it called FF. We expanded on TwoStep by adding immediate failure and success calls (skipping the rest of the function chain), and we made the return object promise compatible. We are actively maintaining it currently, so feedback is highly welcomed.

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.