Created
January 31, 2019 10:07
-
-
Save loretoparisi/9526aaa48acc6f668567bc4f4bba0914 to your computer and use it in GitHub Desktop.
Replace Tokens with Word Boundary in a Text in Unicode and ASCII
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Replace tokens in text | |
* It support both Unicode and ASCII Regex for word boundaries | |
* @param {*} isUnicode true to use Unicode Regex | |
* @param {*} text input text | |
* @param {*} tokens Tokens indexes | |
* @param {*} words Words | |
* @param {*} expr Replacement | |
*/ | |
var replaceTokens = function (isUnicode, text, tokens, words, expr) { | |
const seen = new Map(); | |
var pattern = "\\b($1)\\b"; | |
if (isUnicode) pattern = "(?<!\\S)$1(?!\\S)"; | |
tokens.forEach((token, index) => { | |
var word = words[index]; | |
var escaped = word.replace(/[\-\[\]{}()*+?.,\\\^$|#\s]/g, "\\$&"); | |
var wordRegex = new RegExp(pattern.replace('$1', escaped), "g"); | |
var match = null; | |
while ((match = wordRegex.exec(text)) !== null) { | |
if (match.index > (seen.get(word) || -1)) { | |
var wordEnd = match.index + word.length - 1; | |
var replacement = expr[token].replace('$1', escaped); | |
text = text.slice(0, match.index) + replacement + text.slice(wordRegex.lastIndex); | |
seen.set(word, wordEnd); | |
break; | |
} | |
} | |
}); | |
return text; | |
}//replaceTokens |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment