Skip to content

Instantly share code, notes, and snippets.

@hallvors
Last active September 23, 2016 11:53
Show Gist options
  • Save hallvors/c8803a2bf533b5f21f85b20647b86fcb to your computer and use it in GitHub Desktop.
Save hallvors/c8803a2bf533b5f21f85b20647b86fcb to your computer and use it in GitHub Desktop.
Helper script for better Japanese line wrapping, using Kuromoji tokenizer library
kuromoji.builder({ dicPath: "node_modules/kuromoji/dict/" }).build(function (err, tokenizer) {
if(err)console.log(err);
var textNodesUnder = function(node){ // http://stackoverflow.com/questions/10730309/find-all-text-nodes-in-html-page
var all = [];
for (node=node.firstChild;node;node=node.nextSibling){
if (node.nodeType==3 && !/^\s+$/.test(node.data)) all.push(node);
else all = all.concat(textNodesUnder(node));
}
return all;
};
function addSpanTags(){
// This is the plan:
// Get a list (preferably static..) of all text nodes in document
// tokenize each of them
// wrap each token in a SPAN tag styled with word-break: keep-all
var textnodes = textNodesUnder(document.body);
textnodes.forEach(function(textnode){
if(textnode.parentNode.tagName in {'SCRIPT':1, 'STYLE':1}){
return; // we would cause really funky effects if we messed with those nodes..
}
var words = tokenizer.tokenize(textnode.data);
var indexes = [];
words.forEach(function(wordData){
if(wordData.surface_form.length > 2) { // do not bother with 1 char words
indexes.push([wordData.word_position - 1, wordData.word_position - 1 + wordData.surface_form.length]);
}
});
// now we have all the indexes to split this text node at
for(var newNode, span, after, i = indexes.length-1; i >= 0; i--) {
console.log('will split ' + textnode.data + ' at ' + indexes[i][1]);
if(indexes[i][1] != textnode.data.length){
textnode.splitText(indexes[i][1]); // chops off the remainder
}
newNode = textnode.splitText(indexes[i][0]); // chops off the string to wrap
span = newNode.parentElement.insertBefore(document.createElement('span'), newNode);
span.appendChild(newNode);
span.className = 'avoid-wrap';
}
});
}
// tokenizer is ready
document.readyState != 'complete' ? window.addEventListener('load', addSpanTags, false) : addSpanTags();
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment