Skip to content

Instantly share code, notes, and snippets.

@adriengibrat
Last active May 30, 2017 08:14
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save adriengibrat/817140a89cfd4893b4155a2ac913904d to your computer and use it in GitHub Desktop.
Save adriengibrat/817140a89cfd4893b4155a2ac913904d to your computer and use it in GitHub Desktop.
simple CLDR plural rules parser
#!/usr/bin/env node
/**
* plural.js – simple CLDR plural rules parser
* https://gist.github.com/adriengibrat/817140a89cfd4893b4155a2ac913904d
*
* This program is free software. It comes without any warranty.
* Released under the WTFPL license – http://www.wtfpl.net
*
* Usage:
# default amd & global names are 'plurals' / 'ordinals', depending of data provided
# exports all languages by default
./plural.js [amd & global name] [languages subset] < input.json > output.js
# when no input provided, outputs parser source
# default amd & global name is 'cldr'
./plural.js [amd & global name] > parser.js
* Examples:
* 0. get CLRD data
curl https://raw.githubusercontent.com/unicode-cldr/cldr-core/master/supplemental/plurals.json > /tmp/plurals.json
curl https://raw.githubusercontent.com/unicode-cldr/cldr-core/master/supplemental/ordinals.json > /tmp/ordinals.json
* 1. parse data to js
./plural.js < /tmp/plurals.json > plurals.js
./plural.js < /tmp/ordinals.json > ordinals.js
* 1.1 with custom amd & global name
./plural.js myplurals < /tmp/plurals.json > myplurals.js
./plural.js myordinals < /tmp/ordinals.json > myordinals.js
* 1.2 select exported language(s)
./plural.js pluralsUsa en es < /tmp/plurals.json > plurals-usa.js
./plural.js ordinalsUsa en es < /tmp/ordinals.json > ordinals-usa.js
* 2. get rule parser source
./plural.js > cldr.js
* 2.1 with custom amd & global name
./plural.js mycldr > mycldr.js
* 3. optionally, install uglifyjs
npm i -g uglify-js
* 3.1 pipe the output
./plural.js < /tmp/plurals.json | uglifyjs --compress --mangle - > plurals-all.js
* 3.2 make daddy proud, write crazy one liners
curl https://raw.githubusercontent.com/unicode-cldr/cldr-core/master/supplemental/plurals.json | ./plural.js | uglifyjs --compress --mangle - > plurals.js
*/
var stdin = process.stdin
var stdout = process.stdout
var stderr = process.stderr
var args = process.argv.slice(2)
var input = ''
stdin.setEncoding('utf8')
stdin.on('data', function (chunk) { input += chunk })
stdin.on('end', function () {
try { // parse input
var data = JSON.parse(input)
}
catch (error) { return console.error('invalid JSON data\n') }
try { // find rules
var cardinal = data.supplemental['plurals-type-cardinal']
var ordinal = data.supplemental['plurals-type-ordinal']
var rules = cardinal || ordinal
if (!rules)
return console.error('no cardinal or ordinal rules found\n')
}
catch (error) { return console.error('input must be CLDR data\n') }
try { // compile and write out
var name = args.unshift() || cardinal && 'plurals' || ordinal && 'ordinals'
var subset = args.length ? pick.bind(null, args) : identity
var compile = langs(cldr())
var plurals = compile(subset(rules))
console.log(clean(source(name, plurals.factory())))
}
catch (error) { return console.error(error.message) }
})
if (stdin.isTTY) { // outputs cldr source when no stdin
var name = args.shift() || 'cldr'
console.log(source(name, name === 'gettext' ? gettext : cldr))
process.exit()
}
// Factories
function cldr () { // single language plural ruleset parser
function Vars (operands, sort) { // helps tracking variables used in source code
this.declarations = {}
this.lookups = (operands || []).map(lookup).reverse() // reverse for dependencies
this.sort = sort
}
Vars.prototype = {
toString: function toString () { // generate var declarations block
var declarations = map(identity, this.declarations, this.sort)
return declarations.length ? 'var ' + declarations.join('\n\t, ') + '\n' : ''
}
, parse: function parse (source) { // parse source to find given operands (variable names)
this.lookups.forEach(function (lookup) { lookup.call(this, source) }, this)
return this
}
}
return function cldr (ruleset) {
var vars = new Vars([
'b = (n + ".").split(".")' // array, integer digits & fractional digits in n
, 'f = b[1]' // string, fractional digits in n
, 'i = b[0]' // string, integer digits of n
, 'j = Number(i) == n' // boolean, n is an integer
, 't = f.replace(/0+$/, "")' // string, fractional digits in n without trailing zeros
, 'v = f.length' // integer, number of fraction digits in n
], function (a, b) { return a.length - b.length || (a < b ? -1 : 1) }) // sort variable names
var rules = map(rule.bind(null, vars), ruleset)
.sort(by('type', {zero: 0, one: 1, two: 2, few: 3, many: 4, other: 5}))
return compile(rules, vars)
}
// Vars helper
function lookup (operand) { // create operand lookup function used to parse source
var name = /^\w+/.exec(operand).pop()
var pattern = new RegExp('\\b' + name + '\\b')
return function (source) {
if (pattern.test(this + source))
this.declarations[name] = operand
}
}
// Utils
function map (mapper, object, order) { // map object, optionally by given order
return Object.keys(object)
.sort(order || function () { return 0 })
.map(function (key) { return mapper(object[key], key) })
}
function identity (a) { return a } // KISS helper
function prop (key) { return function (object) { return object[key] } } // DRY helper
function by (prop, order) { // create object comparator to sort by prop, according given order hash
return function (a, b) { return order[a[prop]] < order[b[prop]] ? -1 : 1 }
}
// Plural compile helpers
function rule (vars, rule, name) { // build rule definition object
var parts = rule.trim().split(/\s*@\w*/)
var condition = parse(vars, parts.shift())
var type = name.replace('pluralRule-count-', '')
return {
source: (condition ? 'if (' + condition + ')\n\t' : '') + 'return "' + type + '"'
, type: type
, test: parts.join(' ').split(/[ ,~…]+/).filter(Boolean)
}
}
function parse (vars, source) { // convert plural rule to js code
var AND = ' && '
var OR = ' || '
var EQ = ' == '
var INT = 'j && '
return source // shamelessly borrowed from https://github.com/eemeli/make-plural.js
.replace(/([fin]) % (\d+)/g, function (_, x, n) { // modulos
var name = x + n
vars.declarations[name] = name + ' = ' + (x == 'n' ? 'i' : x) + ' % ' + n
return (x == 'n' ? INT : '') + name
})
.replace(/(\w+ (!?)= )([0-9.]+,[0-9.,]+)/g, function (_, expr, not, list) { // lists
return '(' + expr + list.split(',').join((not ? AND : OR) + expr) + ')'
})
.replace(/(\w+) (!?)= ([0-9]+)\.\.([0-9]+)/g, function (_, x, not, a, b) { // ranges
return not ?
'(' + x + ' < ' + a + OR + x + ' > ' + b + ')'
: (x == 'n' ? INT : '') + x + ' >= ' + a + AND + x + ' <= ' + b
})
.replace(/ and /g, AND)
.replace(/ or /g, OR)
.replace(/ = /g, EQ)
}
function compile (rules, vars) { // compile plural function and returns if tests runs OK
var body = rules.map(prop('source')).join('\n')
var fn = new Function('n', '\t' + (vars.parse(body) + body).replace(/\n/g, '\n\t'))
fn.types = rules.map(prop('type'))
rules.forEach(function (rule) { test(fn, rule.type, rule.test) })
return fn
}
function test (fn, expected, values) { // test if function returns as expected for given values
values.forEach(function (n) {
var result = fn(n)
if (result != expected)
throw Error('n = ' + n + ' -> ' + result + ', expected ' + expected)
})
}
}
function langs (compile) { // langs batch rules parser
return function langs (dictionary) {
return reduce(build, Object.create({factory: factory}), dictionary)
}
// Utils
function reduce (reducer, initial, object) { // reduce object, fp style
return Object.keys(object)
.reduce(function (acc, key) { return reducer(acc, object[key], key) } , initial)
}
function variable (index) { // generate variable names: 'a', 'b', ..., 'z', 'a1', 'b2', etc.
return String.fromCharCode(index % 26 + 97)+ (index / 26 | 0 || '')
}
function indent (source) { return String(source).replace(/\n/g, '\n\t') } // indent code
// Langs parser helpers
function build (langs, rules, lang) { // build langs plural hash
try { langs[lang] = compile(rules) }
catch (error) { throw Error('compile ' + lang + ' plural failed (' + error.message + ')') }
return langs
}
function factory () { // compile factory of langs plural hash
var dedupes = reduce(dedupe, {fns: {}, types: {}}, this)
var build = source.bind(dedupes.types)
var sources = reduce(build, {refs: [], types: [], props: []}, dedupes.fns)
var LF = '\n', LFC = LF + ', '
return new Function('', indent(['\tvar ' + indent(sources.refs.join(LFC))
, 'function types (fn, types) { fn.types = types.slice() }'
, sources.types.join(LF)
, 'return {' + indent(LF + sources.props.join(LFC))
, '}'].join(LF)))
}
function dedupe (dedupe, fn, lang) { // dedupe plural fn definitions and types
var fns = dedupe.fns
fns[fn] = { langs: fns[fn] ? fns[fn].langs.concat(lang) : [lang], fn: fn }
dedupe.types[fn.types] = { list: fn.types }
return dedupe
}
function source (source, dedupe) { // build source parts from deduped fn definitions and types
var types = this[dedupe.fn.types]
var name = variable(source.refs.length)
source.refs.push(name + ' = ' + dedupe.fn)
if (!types.name) {
types.name = variable(source.refs.length)
source.refs.push(types.name + ' = ' + JSON.stringify(types.list))
}
source.types.push('types(' + name + ', ' + types.name + ')')
dedupe.langs.forEach(function (lang) { source.props.push('"' + lang + '": ' + name) })
return source
}
}
// Utils
function pick (keys, object) { // pick keys in given object
return Object.keys(object)
.filter(function (key) { return keys.indexOf(key) !== -1 })
.reduce(function (pick, key) { return pick[key] = object[key], pick }, {})
}
function identity (a) { return a } // no comment
// Source format helpers
function clean (source) { // cleanup source code generated by new Function
return String(source)
.replace(/(\bfunction )(anonymous)?/g, '$1')
.replace(/\s*\/\*\*\//g, '')
}
function umd (root, name, factory) { // small UMD loader
if (typeof define === 'function' && define.amd) {
define(name, factory())
} else if (typeof exports === 'object') {
module.exports = factory()
} else {
root[name] = factory()
}
}
function source (name, factory) { // format source with UMD loader
return '('+ umd + ')(this, "' + name + '", ' + factory + ');'
}
// Easter egg ;)
function gettext () {
return function gettext (rule) {
var expr = parse(/\bplural\s*=\s*(.+)$/, rule)
var n = parse(/\bnplurals\s*=\s*(\d)\b/, rule)
if (/[^n!=<>()%|&?:\s\d]/.test(expr))
throw Error('unsafe char in plural expression: ' + expr)
return new Function('n', '\t' + [
, 'var plural = parseInt(' + expr + ', 10)'
, 'if (plural < 0 || plural >= ' + n + ')'
, '\tthrow Error("invalid plural: " + plural)'
, 'return plural'].join('\n\t')
)
}
// Parse helper
function parse (pattern, string) {
try { return pattern.exec(string).pop() }
catch (e) { throw Error('unable to parse: ' + string) }
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment