Skip to content

Instantly share code, notes, and snippets.

@dtinth dtinth/cut.js
Created Jun 28, 2019

Embed
What would you like to do?
Thai word cut in Chrome
// Note: Using non-standard V8 feature
// https://code.google.com/archive/p/v8-i18n/wikis/BreakIterator.wiki
//
// The standard is now Intl.Segmenter but no browser implements it yet.
//
function cut(text) {
const iterator = new Intl.v8BreakIterator(["th"]);
iterator.adoptText(text);
const result = [];
let pos = iterator.first();
while (pos !== -1) {
let nextPos = iterator.next();
if (nextPos === -1) break;
result.push(text.slice(pos, nextPos));
pos = nextPos;
}
return result
}
it('cuts word', () => {
expect(cut('ตัดคำภาษาไทย')).toEqual(["ตัด", "คำ", "ภาษา", "ไทย"])
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.