Last active
August 6, 2019 12:08
-
-
Save karanlyons/6e65b19fadaab0b28bd5d2e7137919a9 to your computer and use it in GitHub Desktop.
Add translator friendly markup to translatable strings.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export type Formatters = { [k: string]: (s: string) => string }; | |
export class FormatError extends Error { | |
constructor( | |
public message: string, | |
public str: string, | |
public formatters: Formatters, | |
public tag: string | |
) { | |
super(); | |
this.name = "FormatError"; | |
} | |
} | |
const tagRe = /(?:<\/?x:[A-Za-z_][\w-.]*>)/; | |
const tagGroupsRe = /(?:<(\/?)x:([A-Za-z_][\w-.]*)>)/; | |
const tokenizerRe = new RegExp( | |
tagGroupsRe.source + "|" + "(?:(?!" + tagRe.source + ").)+", | |
"g" | |
); | |
export function format(str: string, formatters: Formatters = {}) { | |
/** | |
* Takes a string with x:namespaced "XML-ish" tags (i.e. <x:foo>bar</x:foo>) and a set | |
* of formatters for those tags and returns a new, formatted string. | |
* | |
* format( | |
* "Visit <x:link><x:em-tag>our</x:em-tag> <u>website</u></x:link>.", | |
* { | |
* 'link': (s) => `<a href="https://example.com">${s}</a>`, | |
* 'em-tag': (s) => `<em>${s}</em>`, | |
* }, | |
* ) === "Visit <a href=\"https://example.com\"><em>our</em> <u>website</u></a>." | |
* | |
* This can be used to provide translatable strings with applied markup without leaking | |
* the specifics of that markup to the translator. As this is meant for use alongside, | |
* say, gettext or ICU, self closing tags and attributes are both unsupported. | |
*/ | |
const strings = [[]]; | |
const tags = []; | |
let match: RegExpExecArray | null; | |
tokenizerRe.lastIndex = 0; | |
while ((match = tokenizerRe.exec(str)) !== null) { | |
const [token, endingSlash, tag] = match; | |
if (tag !== undefined) { | |
if (endingSlash === "") { | |
strings.push([]); | |
tags.push(tag); | |
} else if (tags[tags.length - 1] === tag) { | |
if (formatters[tag] !== undefined) { | |
strings[strings.length - 2].push( | |
formatters[tag](strings.pop().join("")) | |
); | |
tags.pop(); | |
} else { | |
throw new FormatError( | |
`No formatter for tag name '${tag}'`, | |
str, | |
formatters, | |
tag | |
); | |
} | |
} else if (tags.length === 0) { | |
throw new FormatError( | |
`Missing matching start tag for </x:${tag}>`, | |
str, | |
formatters, | |
tag | |
); | |
} else { | |
break; | |
} | |
} else { | |
strings[strings.length - 1].push(token); | |
} | |
} | |
if (tags.length !== 0) { | |
const tag = tags[tags.length - 1]; | |
throw new FormatError( | |
`Missing matching end tag for <x:${tag}>`, | |
str, | |
formatters, | |
tag | |
); | |
} | |
return strings.pop().join(""); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Tests: | |
P: Some <b>bold</b> <a href="">nested <strong>elements</strong> with</a> extra <strong>cruft</strong>. | |
P: No tags. | |
P: Empty tag. | |
P: Reused<strong><strong>tags.</strong></strong> | |
P: Visit <a href="http://example.com"><em>our</em> <i>website</i></a>. | |
P: FormatError: Missing matching end tag for <x:c> | |
P: FormatError: No formatter for tag name 'c' | |
P: FormatError: Missing matching end tag for <x:a> | |
P: FormatError: Missing matching start tag for </x:a> | |
Performance: | |
Run 1 of 5; 90,000 trials: 399.044ms | |
Run 2 of 5; 90,000 trials: 388.204ms | |
Run 3 of 5; 90,000 trials: 411.119ms | |
Run 4 of 5; 90,000 trials: 412.003ms | |
Run 5 of 5; 90,000 trials: 409.765ms |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const tests: [string, Formatters, string][] = [ | |
[ | |
"Some <b>bold</b> <x:a>nested <x:b>elements</x:b> with</x:a> extra <x:b>cruft</x:b>.", | |
{ | |
a: s => `<a href="">${s}</a>`, | |
b: s => `<strong>${s}</strong>` | |
}, | |
'Some <b>bold</b> <a href="">nested <strong>elements</strong> with</a> extra <strong>cruft</strong>.' | |
], | |
["No tags.", {}, "No tags."], | |
["<x:empty-tag></x:empty-tag>", { "empty-tag": s => "Empty tag." }, "Empty tag."], | |
[ | |
"Reused<x:b><x:b>tags.</x:b></x:b>", | |
{ b: s => `<strong>${s}</strong>` }, | |
"Reused<strong><strong>tags.</strong></strong>" | |
], | |
[ | |
"Visit <x:link><x:em-tag>our</x:em-tag> <i>website</i></x:link>.", | |
{ | |
link: s => `<a href="http://example.com">${s}</a>`, | |
"em-tag": s => `<em>${s}</em>` | |
}, | |
'Visit <a href="http://example.com"><em>our</em> <i>website</i></a>.' | |
], | |
[ | |
"<x:a><x:b><x:c>Mismatched tags.</x:a></x:b></x:c>", | |
{}, | |
"FormatError: Missing matching end tag for <x:c>" | |
], | |
[ | |
"<x:a><x:b><x:c>Missing formatters.</x:c></x:b></x:a>", | |
{}, | |
"FormatError: No formatter for tag name 'c'" | |
], | |
[ | |
"<x:a>Missing end tag.", | |
{}, | |
"FormatError: Missing matching end tag for <x:a>" | |
], | |
[ | |
"Missing start tag.</x:a>", | |
{}, | |
"FormatError: Missing matching start tag for </x:a>" | |
] | |
]; | |
console.group("Tests:"); | |
for (const [str, formatters, expected] of tests) { | |
let result: string; | |
try { | |
result = format(str, formatters); | |
} catch (err) { | |
result = `${err.name}: ${err.message}`; | |
} | |
if (result === expected) { | |
console.log(`P: ${result}`); | |
} else { | |
console.error(`F:\n ${expected}\n ${result}`); | |
} | |
} | |
console.groupEnd(); | |
console.group("Performance:"); | |
for (let i = 1; i < 6; i++) { | |
const title = `Run ${i} of 5; ${(10000 * tests.length).toLocaleString('en-US')} trials`; | |
console.time(title); | |
for (let i = 0; i < 10000; i++) { | |
for (const [str, formatters, _] of tests) { | |
try { | |
format(str, formatters); | |
} catch (err) {} | |
} | |
} | |
console.timeEnd(title); | |
} | |
console.groupEnd(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment