Skip to content

Instantly share code, notes, and snippets.

@klappy
Last active December 1, 2019 20:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save klappy/447e461d7685f45866ca337c2b1e6e02 to your computer and use it in GitHub Desktop.
Save klappy/447e461d7685f45866ca337c2b1e6e02 to your computer and use it in GitHub Desktop.
Wrap unicode word tokens with html spans in a sentence, without losing any punctuation.
let XRegExp = require('xregexp');
let nonUnicodeLetter = XRegExp('[^\\pL\\pM]+?');
var sentence = "This is a sentence-with some punctuation, and it will be split-up."
console.log("sentence: ", sentence)
var tokens = sentence.split(nonUnicodeLetter)
console.log("tokens: ", tokens)
_sentence = sentence
var response = ""
tokens.forEach(function(token) {
var regex = XRegExp('^(.*?)('+token+')')
var match = _sentence.match(regex, '')
_sentence = _sentence.replace(regex, '')
console.log("token: ", token)
console.log("match: ", match)
response = response + match[1] + '<span>' + match[2] + '</span>'
})
response = "<div>" + response + _sentence + "</div>"
console.log("response: ", response)
// sentence: This is a sentence-with some punctuation, and it will be split-up.
// tokens: [ 'This',
// 'is',
// 'a',
// 'sentence',
// 'with',
// 'some',
// 'punctuation',
// '',
// 'and',
// 'it',
// 'will',
// 'be',
// 'split',
// 'up',
// '' ]
// token: This
// match: [ 'This',
// '',
// 'This',
// index: 0,
// input: 'This is a sentence-with some punctuation, and it will be split-up.' ]
// token: is
// match: [ ' is',
// ' ',
// 'is',
// index: 0,
// input: ' is a sentence-with some punctuation, and it will be split-up.' ]
// token: a
// match: [ ' a',
// ' ',
// 'a',
// index: 0,
// input: ' a sentence-with some punctuation, and it will be split-up.' ]
// token: sentence
// match: [ ' sentence',
// ' ',
// 'sentence',
// index: 0,
// input: ' sentence-with some punctuation, and it will be split-up.' ]
// token: with
// match: [ '-with',
// '-',
// 'with',
// index: 0,
// input: '-with some punctuation, and it will be split-up.' ]
// token: some
// match: [ ' some',
// ' ',
// 'some',
// index: 0,
// input: ' some punctuation, and it will be split-up.' ]
// token: punctuation
// match: [ ' punctuation',
// ' ',
// 'punctuation',
// index: 0,
// input: ' punctuation, and it will be split-up.' ]
// token:
// match: [ '', '', '', index: 0, input: ', and it will be split-up.' ]
// token: and
// match: [ ', and',
// ', ',
// 'and',
// index: 0,
// input: ', and it will be split-up.' ]
// token: it
// match: [ ' it', ' ', 'it', index: 0, input: ' it will be split-up.' ]
// token: will
// match: [ ' will', ' ', 'will', index: 0, input: ' will be split-up.' ]
// token: be
// match: [ ' be', ' ', 'be', index: 0, input: ' be split-up.' ]
// token: split
// match: [ ' split', ' ', 'split', index: 0, input: ' split-up.' ]
// token: up
// match: [ '-up', '-', 'up', index: 0, input: '-up.' ]
// token:
// match: [ '', '', '', index: 0, input: '.' ]
// response: <div><span>This</span> <span>is</span> <span>a</span> <span>sentence</span>-<span>with</span> <span>some</span> <span>punctuation</span><span></span>, <span>and</span> <span>it</span> <span>will</span> <span>be</span> <span>split</span>-<span>up</span><span></span>.</div>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment