[thoughts in progress ~ started 9 NOV 2013]
Lately I've been looking at the various jvm alternative languages (groovy, scala, jruby, clojure) to see which one's syntax makes life easier for us front-end typists dealing with JavaScript.
There's a nice bit of clojure on this page:
(ns rosettacode.24game
(:require [clojure.string :as str]))
(defn parse-infix-data
"input '1+2+3+4'
output (1 + 2 + 3 + 4)
where the numbers are clojure numbers, and the symbols are clojure operators"
[string] (map read-string (next (str/split string #""))))
I can't tell you how map
or read-string
work, or what next
is really doing exactly, but the structure is clear enough that I'm not mystified by ns
, require
, the str
alias, defn
, or the heredoc string (which actually does explain how this works).
This is not an argument for using yet another {?}-to-javascript transpiler such as scriptjure or clojurescript. But if you could write your JavaScript in a declarative manner, easy to read and organize, etc., without need of a source transformation a la CoffeeScript or browserify, wouldn't you just do it?
Assuming we have a shim for clojure.string like so
function clojureString(string) {
return string.split('').join(' ');
}
we could translate the clojure bit above straight into AMD as:
require.config({ 'str' : 'clojureString' });
define('rosettacode.24game.parseInfixData', function (require, exports, module) {
var str = require('str');
module.exports = parseInfixData;
function parseInfixData(string) {
return str(string);
};
});
This is the short form of AMD's define
method which @jrburke has done a pretty good job getting to work in requirejs - it's pretty close to ideal IMO ~ see fixing imports - but there's still something wrong with the picture.
In my years-long search for a better CommonJS/AMD API, I would have translated that to JavaScript as
ns('rosettacode.24game'); // similar to dojo.provide
importScripts({ 'str' : 'clojureString' }); // similar to dojo.require && require.config
define('parseInfixData', function (module) { // similar to dojo.declare
var str = module.require('str');
module.exports = parseInfixData;
function parseInfixData(string) {
return str(string);
};
});
Or in a chained manner:
ns('rosettacode.24game').
importScripts({ 'str' : 'clojure.string' }).
defmodule('parseInfixData', function (module) {
var str = module.require('str');
module.exports = parseInfixData;
function parseInfixData(string) {
return str(string);
};
});
There's even a Universal Module Definition variant on all this. The following example is taken from Bob Yexley's UMD: JavaScript modules that run anywhere:
(function (root, factory) {
if (typeof define === "function" && define.amd) {
define(["jquery", "underscore"], factory);
} else if (typeof exports === "object") {
module.exports = factory(require("jquery"), require("underscore"));
} else {
root.Requester = factory(root.$, root._);
}
}(this, function ($, _) {
// this is where I defined my module implementation
var Requester = { // ... };
return Requester;
}));
As I was working out various alternatives, I used the following require
statement in a test (using @substack's tape:
test('inline', function (t) {
var result = require('rosettacode.24game.parseInfixData')('1+2+3+4'); // <-- chained functions
t.equal(result, '1 + 2 + 3 + 4');
t.end();
});
That seems slightly cryptic at first. The require
call returns a module.exports
object - which in this case is the parseInfixData
function - which is then invoked with the '1+2+3+4'
argument.
So, rather than returning a common object, the chain returns a common function.
It seems like a nice pattern for breaking up methods with multiple arguments and argument types into declarative invocation sequences (excuse the gratuitous ten-dollar jargon).
Now, instead of the short form of the AMD define
method which takes only the factory function parameter, let's look at the long form.
define('id', ['array', 'of', 'dependencies'], function factory(array, of, dependencies) {
// etc.
});
Here everything has to be laid out in order - the id, the array of dependencies to be loaded, and the factory function whose argument list must match the order of names in the dependency array. That's heavy (which is why the short form was develoeped), but it's actually pretty close to the declarative format.
The declarative or chained API version would look like this, where define
kicks off the sequence:
define('id')('array', 'of', 'dependencies')(function factory(array, of dependencies) {
// etc.
});
I tried that with the following fake module boilerplate (where defn
replaces define
):
// chain method that returns itself
function consume(arg) {
// decide by arg type
var type = typeof arg;
if (type == 'function') {
return void def(arg) // <-- note use of void to return undefined
// alternative: just call def(arg) w/o blocking a return value
}
else if (type == 'string') {
load.apply(this, arguments)
}
return consume
}
// first call here
function defn(id) {
console.log(id)
// set up this context with id, etc., and start cpi
return consume
}
// fetch items not loaded
function load() {
console.log(Array.apply(0,arguments))
// proceed with require resolutions
}
// final stop for this pattern
function def(fn) {
console.log(fn)
// process fnbody if using triple comment strings (bear with me)
// then invoke when all dependencies available
var exports = {}
fn.call(exports, {exports: {}})
// finally, reset context for next defn() call...
}
then called this
defn('id')('array', 'of', 'dependencies')(function factory(array, of, dependenciese) {
// etc.
});
and it worked, printing out
id
["array", "of", "dependencies"]
factory(array, of, dependencies)
I then broke up the same-line layout with newlines
defn('id')
('array', 'of', 'dependencies')
(function factory(array, of, dependencies) {
// etc.
});
and it worked. Then I added whitespace for some readability
defn('id')
('array', 'of', 'dependencies')
(function factory(array, of, dependencies) {
// etc.
});
and it worked. Then I removed some indentation
defn('id')
('array', 'of', 'dependencies')
(function factory(array, of, dependencies) {
// etc.
});
and it worked. Then I added grouping parentheses
(defn('id')
('array', 'of', 'dependencies')
(function factory(array, of, dependencies) {
// etc.
}));
and it worked.
Thus, a clojure-like syntax for JavaScript modules - sans significant whitespace - which is practically readable.
Not sure what to call this pattern but "chained consumer api" or "chained cpi" for short stands a reasonable chance. In this pattern, a function always returns another function unless by argument type or value it is designated last in the chain.
The chained CPI pattern has some interesting benefits:
- only one new
global
loader/definer variable to use - only one semi-colon required per
defn()
blob - the trailing semi-colon prevents successor statements from being processed as arguments
- you can use
void
in the method that ends the sequence (rescuing a JS bad part for good) - multiple
requires
or setup can be grouped in one blob, - ...or stacked on separate lines
- runtimes allow spaces between blobs for readability
- indentation is optional
- parentheses can be used for grouping instead
Addy Osmani says somewhere (his Backbone or Design Patterns series) that "the elephant in the room" with respect to JavaScript is the lack of built-in module/dependency loading/resolution.
With all due respect, the elephant in the room is CommonJS and the synchronous require() method. Here are several libs and articles that attempt to deal with this for various use-cases:
- lmd
- Testing Private State and Mocking Dependencies
- injectr - suspiciously similar to the vojta jina module (previous)
- rewire
- load
- nodejs simple loader
- helios kernel
- mockery
- dependency loader ~ requirements
To make up for that in the browser, AMD specifies - and RequireJS and SeaJS both implement - this:
define(id, [list, of, dependencies], function (module, require, exports, a, b, c,) {
// your mileage variance here
});
As stated earlier, that is one big method call with 6 possible argument variations. RequireJS and SeaJS both implement another variation:
define(function(module, require, exports) {
var a = require('a')
module.exports = {
getA: function () {
return a
}
}
})
RequireJS then examines the function via toString() for require statements and loads them as needed, calling the function when they are ready. (AngularJS does something similar.)
For some, this pattern is just too much:
but again, I think this version of define
is very close - it still leaves us with some configuration to do - mainly elsewhere - which I think should instead be as close to the code as possible. For that I suggest...
A configuration specifier argument for dependencies helps us out here. Instead of just paths we can provide the alias or local varName we would have used with require()
- so instead of:
(defn('id')
('a','b')
(function () {
var a = require('a');
var b = require('b');
module.exports = fn;
function fn(arg) {
return 'fn(arg): ' + a(arg) + b(arg);
}
}));
we can use a
and b
as if declared 'globally'
(defn('id')
({'a' : 'path',
'b' : 'path' })
(function () {
module.exports = fn;
function fn(arg) {
return 'fn(arg): ' + a(arg) + b(arg); // <-- pseudo globals for dependencies
}
}));
and eliminate the require
statements altogether.
For that to work we need to execute the factory function inside another function that receives the varName references as arguments. That way you do not need to specify every argument in the correct order in the factory param list.
[ TODO ~ details , maybe an implementation ]
And suddenly we have shorter code, easier on the eyes, plus:
- specifying configuration and imports in one statement allows us to mock things as necessary for tests
- we can declare/pass a
global
forwindow
- we need only specify
global.<some_name>
for anything that exports itself to the global scope rather than imports.
Alas, CoffeeScript does not support the whitespace-between-lines pattern or the triple-chars comment. You can use backticks in CoffeeScript which will include the ticked source as pure JavaScript, but that gets us out of CoffeeScript proper so what is the real win?
For the imports and factory parts, you have to write your CoffeeScript version of this as
defn("d")('a', 'b', 'c')(->
F = ->
a() + b() + c()
module.exports = F
)
to produce this:
defn("d")('a', 'b', 'c')(function() {
var F;
F = function() {
return a() + b() + c();
};
return module.exports = F;
});
However where the imports list gets big for a single line, start parens on one line are needed to keep order.
So this
defn("F")(
'a', 'b', 'c')(
'd', 'e', 'f')(->
F = ->
a() + b() + c()
module.exports = F
)
or this
defn( "F" )( 'a', 'b', 'c' )(
'd', 'e', 'f'
)( ->
F = ->
a() + b() + c()
module.exports = F
)
will produce this:
defn("F")('a', 'b', 'c')('d', 'e', 'f')(function() {
var F;
F = function() {
return a() + b() + c();
};
return module.exports = F;
});
However, that feels awkward to write. Using a config spec argument for the imports part makes the CoffeeScript easier to type, and to read.
This in CoffeeScript:
(defn("F")(
a: "path"
b: "path"
c: "path"
)( ->
F = ->
a() + b() + c()
module.exports = F
))
produces this in JavaScript:
defn('F')({
'a': 'path',
'b': 'path',
'c': 'path'
})(function() {
var F;
F = function() {
return a() + b() + c();
};
return module.exports = F;
});
which you can write straight in JavaScript as
(defn('id')
({'a' : 'path',
'b' : 'path',
'c' : 'path' })
(function () {
module.exports = F;
function F(arg) {
return a() + b() + c();
}
}));
"defer javascript parsing/evaluation" has emerged as an accepted performance tip. Not surprisingly, like so many other front-end performance tips, this advice is not uniformly well-supported by browsers (i.e., a source file loaded is parsed and evaluated as soon as possible - ajax responses excepted).
We can get around that by supporting functions with special comment chars such as mstring by (@rjrodger):
(defn('main')
('a', 'b', 'c')
(function(){/*** // <-- the triple comment denotes the start of a multi-line string
console.log(module.id) // main
var a = require('a')
var b = require('b')
var c = require('c')
module.exports = function ashley() {
return module.id // "main"
}
***/}));// <-- ... and the closing triple comment denotes the end of the multi-line string
The strategy there is to parse the function body for its content between the /***
and ***/
tokens with
var fs = fn.toString();
var table = fs.match(/\/(\*){3,3}[^\*]+(\*){3,3}\//);
and store that somewhere until the factory needs to be run.
The heroic Tobie Langel has addressed this approach in some detail.
So, the win for us here is that only those modules required on a page are actually parsed, evaluated and executed. The others may be loaded in stringified fashion, possibly even pre-packed together so that a site can download one file and cache it without the eval-all cost that normally brings.
This pattern gives us a few more benefits:
- no need to type the module param/argument in the factory function definition anymore
- our module loader can do the 'injecting' of various pseudo-globals (module, require, exports, global)
- our module loader can also pass other dependencies, like defer, debug, assert, or imports for the list of imported dependencies, etc.
- the triple-comment need only be used on deferred stuff - leaving the possibility of smoke testing the factory functions beforehand.
For the stringification, CoffeeScript supports backticks for pure JavaScript (again, I do not recommend that), or going with the YUI compressor copyright comment indicator - /*!
- so that
defn -> ###!
your code here
###
becomes
defn(function() {
/*!
your code here
*/
});
The copyright comment indicator may be the way to go here but I suspect we are really going to have to use a different kind of linter/compressor solution for the stringification of commented code.
update: I'd also been alerted that some browsers don't return comments in source when using function.toString(). I'm not worried yet, as those tend to be older browsers, or just incompetent mobile providers (... and who uses mobile anyway?).
JavaScript has no built-in annotation support. We can fake it with labels,
annotation:
some code here
but you have to add more symbols around them just to prevent their removal by minifiers
{{ 'annotation' : some code here })
which means layering on some new capability just to beat the minifiers.
Insted of that, the cpi pattern can be used for declaring / expecting certain annotation statements:
(example('something')
('annotation')
(implemntation)
);
Now that is becoming a more general pattern as I stream my conscious thought process through this concept.
The 'example' method here uses a cpi that expects annotation labels or directives, followed by implementation blocks (objects or functions).
[ __need an implementation of this, please ~ see defmacro __ ]
Take the fairly common bdd describe-it-expect pattern
describe(str, function() {
describe(str, function() {
before(function(done){
})
after(function(done){
})
it(str, function(done){
expect(a).toBe(b)
})
it(str, function(done){
expect(a).toEqual(b)
})
)
);
and turn it into a cpi variant like this:
(describe(str)
(describe(str)
(before) // a before object or annotation string
(function(done){
})
(after) // an after object or annotation string
(function(done){
})
(str) // it statement - should do something
(function(done){
expect(a).toBe(b)
})
(str) // it statement - should not do something
(function(done){
expect(a).toEqual(b)
})
)
);
Not a huge improvement but one difference stands out: There are no comma-delimited multi-argument method calls. So, again, you can structure your program vertically into declarative statements.
So the pattern underneath it all might look slightly ruby-ish:
(def(id)
(name)
(impl)
(name)
(impl)
(name)
(impl)
(end)); // maybe??
def
is a method, name
is an object or string, followed by an implementation
object or function, and end
is an object or string terminating the cpi sequence for the given def()
call.
All we need now is a way to create these macro-like invocation chains - defmacro.js - which I'll be focusing on the next few days. GOD HELP ME
based on learning tests with metafunction.
// 12 DEC 2013
// IT RUNS BUT I'm not completely happy with this yet
// having to call macro() already bugs me
// need a way to defer function executions:
// + functions in the 3rd input step in macro tests are executed immediately
// + need to expose the api of each created instance to real tests with jasmine or tape
// + maybe add a way to save a reference to the macro, like meta() or something...
// the count++ statements are unindented so they're easier to scan from the rest of the implementation.
var count = 0;
var start = (new Date()).getTime();
;(function () {
count++
if (typeof global == 'undefined' && window) {
window.global = window
}
var macros = {};
global.macro = macro;
function macro(id) {
count++
return macros[id][id]
}
global.defmacro = defmacro;
function defmacro(id) {
count++
var m = macros[id] || (macros[id] = function (value) {
count++
return this.id(value)
});
m.id = id
return get(m)(id)
}
function get(m) {
count++
return function (s) {
count++
m.next = s
return set(m)
}
}
function set(m) {
count++
//console.log(m)
return function (fn) {
count++;
(typeof fn == 'function') || (function (msg) {
throw new Error(msg);
}('must define a function argument for \'' + m.next + '\''));
m[m.next] = function () {
count++
fn.apply(m, arguments);
return m[m.id]
}
delete m.next
return get(m)
}
}
}());
// simple by-arg-type delegation test
(defmacro('test')
(function test(arg) {
count++
console.log('test')
var t = typeof arg;
this[t](arg)
})
('string')
(function string(arg) {
count++
console.log('string %s', arg)
})
('object')
(function object(arg) {
count++
console.log('object %o', arg)
})
('function')
(function fn(arg) {
count++
console.log('function %o', arg)
arg()
}));
(macro('test')
('str')
({ id: 'object' })
(function (s) {
console.log(count++)
console.log((new Date()).getTime() - start)
}));
(macro('test')
('2')
({ id: '2' })
(function (s) {
console.log(count++)
console.log((new Date()).getTime() - start)
}));