Skip to content

Instantly share code, notes, and snippets.

@therobinkim
Created December 24, 2018 00:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save therobinkim/21444d5a4cd89266ae0e6e16bced7764 to your computer and use it in GitHub Desktop.
Save therobinkim/21444d5a4cd89266ae0e6e16bced7764 to your computer and use it in GitHub Desktop.
htmldiff-js
(function webpackUniversalModuleDefinition(root, factory) {
if(typeof exports === 'object' && typeof module === 'object')
module.exports = factory();
else if(typeof define === 'function' && define.amd)
define([], factory);
else if(typeof exports === 'object')
exports["HtmlDiff"] = factory();
else
root["HtmlDiff"] = factory();
})(window, function() {
return /******/ (function(modules) { // webpackBootstrap
/******/ // The module cache
/******/ var installedModules = {};
/******/
/******/ // The require function
/******/ function __webpack_require__(moduleId) {
/******/
/******/ // Check if module is in cache
/******/ if(installedModules[moduleId]) {
/******/ return installedModules[moduleId].exports;
/******/ }
/******/ // Create a new module (and put it into the cache)
/******/ var module = installedModules[moduleId] = {
/******/ i: moduleId,
/******/ l: false,
/******/ exports: {}
/******/ };
/******/
/******/ // Execute the module function
/******/ modules[moduleId].call(module.exports, module, module.exports, __webpack_require__);
/******/
/******/ // Flag the module as loaded
/******/ module.l = true;
/******/
/******/ // Return the exports of the module
/******/ return module.exports;
/******/ }
/******/
/******/
/******/ // expose the modules object (__webpack_modules__)
/******/ __webpack_require__.m = modules;
/******/
/******/ // expose the module cache
/******/ __webpack_require__.c = installedModules;
/******/
/******/ // define getter function for harmony exports
/******/ __webpack_require__.d = function(exports, name, getter) {
/******/ if(!__webpack_require__.o(exports, name)) {
/******/ Object.defineProperty(exports, name, {
/******/ configurable: false,
/******/ enumerable: true,
/******/ get: getter
/******/ });
/******/ }
/******/ };
/******/
/******/ // define __esModule on exports
/******/ __webpack_require__.r = function(exports) {
/******/ Object.defineProperty(exports, '__esModule', { value: true });
/******/ };
/******/
/******/ // getDefaultExport function for compatibility with non-harmony modules
/******/ __webpack_require__.n = function(module) {
/******/ var getter = module && module.__esModule ?
/******/ function getDefault() { return module['default']; } :
/******/ function getModuleExports() { return module; };
/******/ __webpack_require__.d(getter, 'a', getter);
/******/ return getter;
/******/ };
/******/
/******/ // Object.prototype.hasOwnProperty.call
/******/ __webpack_require__.o = function(object, property) { return Object.prototype.hasOwnProperty.call(object, property); };
/******/
/******/ // __webpack_public_path__
/******/ __webpack_require__.p = "/dist/";
/******/
/******/
/******/ // Load entry module and return exports
/******/ return __webpack_require__(__webpack_require__.s = 1);
/******/ })
/************************************************************************/
/******/ ([
/* 0 */
/***/ (function(module, __webpack_exports__, __webpack_require__) {
"use strict";
__webpack_require__.r(__webpack_exports__);
// CONCATENATED MODULE: ./src/Action.js
const Action = {
equal: 0,
delete: 1,
insert: 2,
none: 3,
replace: 4
};
/* harmony default export */ var src_Action = (Action);
// CONCATENATED MODULE: ./src/Match.js
class Match {
constructor(startInOld, startInNew, size) {
this.startInOld = startInOld;
this.startInNew = startInNew;
this.size = size;
}
get endInOld() {
return this.startInOld + this.size;
}
get endInNew() {
return this.startInNew + this.size;
}
};
// CONCATENATED MODULE: ./src/MatchOptions.js
class MatchOptions {
constructor() {
this.blockSize = 0;
this.repeatingWordsAccuracy = 0.0;
this.ignoreWhitespaceDifferences = false;
}
}
// CONCATENATED MODULE: ./src/Utils.js
const tagRegex = /^\s*<\/?[^>]+>\s*$/;
const tagWordRegex = /<[^\s>]+/;
const whitespaceRegex = /^(\s|&nbsp;)+$/;
const wordRegex = /[\w\#@]+/;
const specialCaseWordTags = ['<img'];
function isTag(item) {
if (specialCaseWordTags.some(re => item !== null && item.startsWith(re))) {
return false;
}
return tagRegex.test(item);
}
function stripTagAttributes(word) {
let tag = tagWordRegex.exec(word)[0];
word = tag + (word.endsWith("/>") ? "/>" : ">");
return word;
}
function wrapText(text, tagName, cssClass) {
return ['<', tagName, ' class="', cssClass, '">', text, '</', tagName, '>'].join('');
}
function isStartOfTag(val) {
return val === '<';
}
function isEndOfTag(val) {
return val === '>';
}
function isStartOfEntity(val) {
return val === '&';
}
function isEndOfEntity(val) {
return val === ';';
}
function isWhiteSpace(value) {
return whitespaceRegex.test(value);
}
function stripAnyAttributes(word) {
if (isTag(word)) {
return stripTagAttributes(word);
}
return word;
}
function isWord(text) {
return wordRegex.test(text);
}
// CONCATENATED MODULE: ./src/MatchFinder.js
function MatchFinder_putNewWord(block, word, blockSize) {
block.push(word);
if (block.length > blockSize) {
block.shift();
}
if (block.length !== blockSize) {
return null;
}
return block.join('');
}
// Finds the longest match in given texts. It uses indexing with fixed granularity that is used to compare blocks of text.
class MatchFinder_MatchFinder {
constructor(oldWords, newWords, startInOld, endInOld, startInNew, endInNew, options) {
this.oldWords = oldWords;
this.newWords = newWords;
this.startInOld = startInOld;
this.endInOld = endInOld;
this.startInNew = startInNew;
this.endInNew = endInNew;
this.options = options;
}
indexNewWords() {
this.wordIndices = new Map();
let block = [];
for (let i = this.startInNew; i < this.endInNew; i++) {
// if word is a tag, we should ignore attributes as attribute changes are not supported (yet)
let word = this.normalizeForIndex(this.newWords[i]);
let key = MatchFinder_putNewWord(block, word, this.options.blockSize);
if (key === null) {
continue;
}
if (this.wordIndices.has(key)) {
this.wordIndices.get(key).push(i);
} else {
this.wordIndices.set(key, [i]);
}
}
}
// Converts the word to index-friendly value so it can be compared with other similar words
normalizeForIndex(word) {
word = stripAnyAttributes(word);
if (this.options.IgnoreWhiteSpaceDifferences && isWhiteSpace(word)) {
return ' ';
}
return word;
}
findMatch() {
this.indexNewWords();
this.removeRepeatingWords();
if (this.wordIndices.length === 0) {
return null;
}
let bestMatchInOld = this.startInOld;
let bestMatchInNew = this.startInNew;
let bestMatchSize = 0;
let matchLengthAt = new Map();
const blockSize = this.options.blockSize;
let block = [];
for (let indexInOld = this.startInOld; indexInOld < this.endInOld; indexInOld++) {
let word = this.normalizeForIndex(this.oldWords[indexInOld]);
let index = MatchFinder_putNewWord(block, word, blockSize);
if (index === null) {
continue;
}
let newMatchLengthAt = new Map();
if (!this.wordIndices.has(index)) {
matchLengthAt = newMatchLengthAt;
continue;
}
for (let indexInNew of this.wordIndices.get(index)) {
let newMatchLength = (matchLengthAt.has(indexInNew - 1) ? matchLengthAt.get(indexInNew - 1) : 0) + 1;
newMatchLengthAt.set(indexInNew, newMatchLength);
if (newMatchLength > bestMatchSize) {
bestMatchInOld = indexInOld - newMatchLength - blockSize + 2;
bestMatchInNew = indexInNew - newMatchLength - blockSize + 2;
bestMatchSize = newMatchLength;
}
}
matchLengthAt = newMatchLengthAt;
}
return bestMatchSize !== 0 ? new Match(bestMatchInOld, bestMatchInNew, bestMatchSize + blockSize - 1) : null;
}
// This method removes words that occur too many times. This way it reduces total count of comparison operations
// and as result the diff algoritm takes less time. But the side effect is that it may detect false differences of
// the repeating words.
removeRepeatingWords() {
let threshold = this.newWords.length + this.options.repeatingWordsAccuracy;
let repeatingWords = Array.from(this.wordIndices.entries()).filter(i => i[1].length > threshold).map(i => i[0]);
for (let w of repeatingWords) {
this.wordIndices.delete(w);
}
}
}
// CONCATENATED MODULE: ./src/Operation.js
class Operation {
constructor(action, startInOld, endInOld, startInNew, endInNew) {
this.action = action;
this.startInOld = startInOld;
this.endInOld = endInOld;
this.startInNew = startInNew;
this.endInNew = endInNew;
}
}
// CONCATENATED MODULE: ./src/Mode.js
const Mode = {
character: 0,
tag: 1,
whitespace: 2,
entity: 3
};
/* harmony default export */ var src_Mode = (Mode);
// CONCATENATED MODULE: ./src/WordSplitter.js
function WordSplitter_convertHtmlToListOfWords(text, blockExpressions) {
let state = {
mode: src_Mode.character,
currentWord: [],
words: []
};
let blockLocations = WordSplitter_findBlocks(text, blockExpressions);
let isBlockCheckRequired = !!blockLocations.size;
let isGrouping = false;
let groupingUntil = -1;
for (let i = 0; i < text.length; i++) {
var character = text[i];
// Don't bother executing block checks if we don't have any blocks to check for!
if (isBlockCheckRequired) {
// Check if we have completed grouping a text sequence/block
if (groupingUntil === index) {
groupingUntil = -1;
isGrouping = false;
}
// Check if we need to group the next text sequence/block
let until = 0;
if (blockLocations.has(index)) {
until = blockLocations.get(index);
isGrouping = true;
groupingUntil = until;
}
// if we are grouping, then we don't care about what type of character we have, it's going to be treated as a word
if (isGrouping) {
state.currentWord.push(character);
state.mode = src_Mode.character;
continue;
}
}
switch (state.mode) {
case src_Mode.character:
if (isStartOfTag(character)) {
WordSplitter_addClearWordSwitchMode(state, '<', src_Mode.tag);
} else if (isStartOfEntity(character)) {
WordSplitter_addClearWordSwitchMode(state, character, src_Mode.entity);
} else if (isWhiteSpace(character)) {
WordSplitter_addClearWordSwitchMode(state, character, src_Mode.whitespace);
} else if (isWord(character) && (state.currentWord.length === 0 || isWord(state.currentWord[state.currentWord.length - 1]))) {
state.currentWord.push(character);
} else {
WordSplitter_addClearWordSwitchMode(state, character, src_Mode.character);
}
break;
case src_Mode.tag:
if (isEndOfTag(character)) {
state.currentWord.push(character);
state.words.push(state.currentWord.join(''));
state.currentWord = [];
state.mode = isWhiteSpace(character) ? src_Mode.whitespace : src_Mode.character;
} else {
state.currentWord.push(character);
}
break;
case src_Mode.whitespace:
if (isStartOfTag(character)) {
WordSplitter_addClearWordSwitchMode(state, character, src_Mode.tag);
} else if (isStartOfEntity(character)) {
WordSplitter_addClearWordSwitchMode(state, character, src_Mode.entity);
} else if (isWhiteSpace(character)) {
state.currentWord.push(character);
} else {
WordSplitter_addClearWordSwitchMode(state, character, src_Mode.character);
}
break;
case src_Mode.entity:
if (isStartOfTag(character)) {
WordSplitter_addClearWordSwitchMode(state, character, src_Mode.tag);
} else if (isWhiteSpace(character)) {
WordSplitter_addClearWordSwitchMode(state, character, src_Mode.whitespace);
} else if (isEndOfEntity(character)) {
let switchToNextMode = true;
if (state.currentWord.length !== 0) {
state.currentWord.push(character);
state.words.push(state.currentWord.join(''));
//join &nbsp; entity with last whitespace
if (state.words.length > 2 && isWhiteSpace(state.words[state.words.length - 2]) && isWhiteSpace(state.words[state.words.length - 1])) {
let w1 = state.words[state.words.length - 2];
let w2 = state.words[state.words.length - 1];
state.words.splice(state.words.length - 2, 2);
state.currentWord = [(w1 + w2).split()];
state.mode = src_Mode.whitespace;
switchToNextMode = false;
}
}
if (switchToNextMode) {
state.currentWord = [];
state.mode = src_Mode.character;
}
} else if (isWord(character)) {
state.currentWord.push(character);
} else {
WordSplitter_addClearWordSwitchMode(state, character, src_Mode.character);
}
break;
}
}
if (state.currentWord.length !== 0) {
state.words.push(state.currentWord.join(''));
}
return state.words;
}
function WordSplitter_addClearWordSwitchMode(state, character, mode) {
if (state.currentWord.length !== 0) {
state.words.push(state.currentWord.join(''));
}
state.currentWord = [character];
state.mode = mode;
}
function WordSplitter_findBlocks(text, blockExpressions) {
let blockLocations = new Map();
if (blockExpressions === null) {
return blockLocations;
}
for (let exp of blockExpressions) {
let m;
while ((m = exp.exec(text)) !== null) {
if (blockLocations.has(m.index)) {
throw new Error("One or more block expressions result in a text sequence that overlaps. Current expression: " + exp.toString());
}
blockLocations.set(m.index, m.index + m[0].length);
}
}
return blockLocations;
}
// CONCATENATED MODULE: ./src/Diff.js
// This value defines balance between speed and memory utilization. The higher it is the faster it works and more memory consumes.
const Diff_MatchGranuarityMaximum = 4;
const Diff_specialCaseClosingTags = new Map([['</strong>', 0], ['</em>', 0], ['</b>', 0], ['</i>', 0], ['</big>', 0], ['</small>', 0], ['</u>', 0], ['</sub>', 0], ['</strike>', 0], ['</s>', 0], ['</dfn>', 0]]);
const Diff_specialCaseOpeningTagRegex = /<((strong)|(b)|(i)|(dfn)|(em)|(big)|(small)|(u)|(sub)|(sup)|(strike)|(s))[\>\s]+/ig;
class Diff_HtmlDiff {
constructor(oldText, newText) {
this.content = [];
this.newText = newText;
this.oldText = oldText;
this.specialTagDiffStack = [];
this.newWords = [];
this.oldWords = [];
this.matchGranularity = 0;
this.blockExpressions = [];
this.repeatingWordsAccuracy = 1.0;
this.ignoreWhiteSpaceDifferences = false;
this.orphanMatchThreshold = 0.0;
}
build() {
if (this.oldText === this.newText) {
return this.newText;
}
this.splitInputsIntoWords();
this.matchGranularity = Math.min(Diff_MatchGranuarityMaximum, this.oldWords.length, this.newWords.length);
let operations = this.operations();
for (let item of operations) {
this.performOperation(item);
}
return this.content.join('');
}
addBlockExpression(exp) {
this.blockExpressions.push(exp);
}
splitInputsIntoWords() {
this.oldWords = WordSplitter_convertHtmlToListOfWords(this.oldText, this.blockExpressions);
//free memory, allow it for GC
this.oldText = null;
this.newWords = WordSplitter_convertHtmlToListOfWords(this.newText, this.blockExpressions);
//free memory, allow it for GC
this.newText = null;
}
performOperation(opp) {
switch (opp.action) {
case src_Action.equal:
this.processEqualOperation(opp);
break;
case src_Action.delete:
this.processDeleteOperation(opp, "diffdel");
break;
case src_Action.insert:
this.processInsertOperation(opp, "diffins");
break;
case src_Action.none:
break;
case src_Action.replace:
this.processReplaceOperation(opp);
break;
}
}
processReplaceOperation(opp) {
this.processDeleteOperation(opp, "diffmod");
this.processInsertOperation(opp, "diffmod");
}
processInsertOperation(opp, cssClass) {
let text = this.newWords.filter((s, pos) => pos >= opp.startInNew && pos < opp.endInNew);
this.insertTag("ins", cssClass, text);
}
processDeleteOperation(opp, cssClass) {
let text = this.oldWords.filter((s, pos) => pos >= opp.startInOld && pos < opp.endInOld);
this.insertTag("del", cssClass, text);
}
processEqualOperation(opp) {
let result = this.newWords.filter((s, pos) => pos >= opp.startInNew && pos < opp.endInNew);
this.content.push(result.join(''));
}
insertTag(tag, cssClass, words) {
while (words.length) {
let nonTags = this.extractConsecutiveWords(words, x => !isTag(x));
let specialCaseTagInjection = '';
let specialCaseTagInjectionIsbefore = false;
if (nonTags.length !== 0) {
let text = wrapText(nonTags.join(''), tag, cssClass);
this.content.push(text);
} else {
if (Diff_specialCaseOpeningTagRegex.test(words[0])) {
this.specialTagDiffStack.push(words[0]);
specialCaseTagInjection = '<ins class="mod">';
if (tag === 'del') {
words.shift();
while (words.length > 0 && Diff_specialCaseOpeningTagRegex.test(words[0])) {
words.shift();
}
}
} else if (Diff_specialCaseClosingTags.has(words[0])) {
let openingTag = this.specialTagDiffStack.length === 0 ? null : this.specialTagDiffStack.pop();
if (!(openingTag === null || openingTag !== words[0].replace(/\//g, ''))) {
specialCaseTagInjection = '</ins>';
specialCaseTagInjectionIsbefore = true;
}
if (tag === 'del') {
words.shift();
while (words.length > 0 && Diff_specialCaseClosingTags.has(words[0])) {
words.shift();
}
}
}
if (words.length === 0 && specialCaseTagInjection.length === 0) {
break;
}
if (specialCaseTagInjectionIsbefore) {
this.content.push(specialCaseTagInjection + this.extractConsecutiveWords(words, isTag).join(''));
} else {
this.content.push(this.extractConsecutiveWords(words, isTag).join('') + specialCaseTagInjection);
}
}
}
}
extractConsecutiveWords(words, condition) {
let indexOfFirstTag = null;
for (let i = 0; i < words.length; i++) {
let word = words[i];
if (i === 0 && word === ' ') {
words[i] = '&nbsp;';
}
if (!condition(word)) {
indexOfFirstTag = i;
break;
}
}
if (indexOfFirstTag !== null) {
let items = words.filter((s, pos) => pos >= 0 && pos < indexOfFirstTag);
if (indexOfFirstTag > 0) {
words.splice(0, indexOfFirstTag);
}
return items;
} else {
let items = words.filter((s, pos) => pos >= 0 && pos < words.length);
words.splice(0, words.length);
return items;
}
}
operations() {
let positionInOld = 0;
let positionInNew = 0;
let operations = [];
let matches = this.matchingBlocks();
matches.push(new Match(this.oldWords.length, this.newWords.length, 0));
let matchesWithoutOrphans = this.removeOrphans(matches);
for (let match of matchesWithoutOrphans) {
let matchStartsAtCurrentPositionInOld = positionInOld === match.startInOld;
let matchStartsAtCurrentPositionInNew = positionInNew === match.startInNew;
let action;
if (!matchStartsAtCurrentPositionInOld && !matchStartsAtCurrentPositionInNew) {
action = src_Action.replace;
} else if (matchStartsAtCurrentPositionInOld && !matchStartsAtCurrentPositionInNew) {
action = src_Action.insert;
} else if (!matchStartsAtCurrentPositionInOld) {
action = src_Action.delete;
} else {
action = src_Action.none;
}
if (action !== src_Action.none) {
operations.push(new Operation(action, positionInOld, match.startInOld, positionInNew, match.startInNew));
}
if (match.length !== 0) {
operations.push(new Operation(src_Action.equal, match.startInOld, match.endInOld, match.startInNew, match.endInNew));
}
positionInOld = match.endInOld;
positionInNew = match.endInNew;
}
return operations;
}
*removeOrphans(matches) {
let prev = null;
let curr = null;
for (let next of matches) {
if (curr === null) {
prev = new Match(0, 0, 0);
curr = next;
continue;
}
if (prev.endInOld === curr.startInOld && prev.endInNew === curr.startInNew || curr.endInOld === next.startInOld && curr.endInNew === next.startInNew) {
yield curr;
let tmp = prev = curr; // "let tmp" avoids babel traspiling error
curr = next;
continue;
}
let sumLength = (t, n) => t + n.length;
let oldDistanceInChars = this.oldWords.slice(prev.endInOld, next.startInOld).reduce(sumLength, 0);
let newDistanceInChars = this.newWords.slice(prev.endInNew, next.startInNew).reduce(sumLength, 0);
let currMatchLengthInChars = this.newWords.slice(curr.startInNew, curr.endInNew).reduce(sumLength, 0);
if (currMatchLengthInChars > Math.max(oldDistanceInChars, newDistanceInChars) * this.orphanMatchThreshold) {
yield curr;
}
prev = curr;
curr = next;
}
yield curr;
}
matchingBlocks() {
let matchingBlocks = [];
this.findMatchingBlocks(0, this.oldWords.length, 0, this.newWords.length, matchingBlocks);
return matchingBlocks;
}
findMatchingBlocks(startInOld, endInOld, startInNew, endInNew, matchingBlocks) {
let match = this.findMatch(startInOld, endInOld, startInNew, endInNew);
if (match !== null) {
if (startInOld < match.startInOld && startInNew < match.startInNew) {
this.findMatchingBlocks(startInOld, match.startInOld, startInNew, match.startInNew, matchingBlocks);
}
matchingBlocks.push(match);
if (match.endInOld < endInOld && match.endInNew < endInNew) {
this.findMatchingBlocks(match.endInOld, endInOld, match.endInNew, endInNew, matchingBlocks);
}
}
}
findMatch(startInOld, endInOld, startInNew, endInNew) {
for (let i = this.matchGranularity; i > 0; i--) {
let options = new MatchOptions();
options.blockSize = i;
options.repeatingWordsAccuracy = this.repeatingWordsAccuracy;
options.ignoreWhitespaceDifferences = this.ignoreWhiteSpaceDifferences;
let finder = new MatchFinder_MatchFinder(this.oldWords, this.newWords, startInOld, endInOld, startInNew, endInNew, options);
let match = finder.findMatch();
if (match !== null) {
return match;
}
}
return null;
}
}
Diff_HtmlDiff.execute = function (oldText, newText) {
return new Diff_HtmlDiff(oldText, newText).build();
};
/* harmony default export */ var Diff = __webpack_exports__["default"] = (Diff_HtmlDiff);
/***/ }),
/* 1 */
/***/ (function(module, exports, __webpack_require__) {
module.exports = __webpack_require__(0);
/***/ })
/******/ ]);
});
!function(e,t){"object"==typeof exports&&"object"==typeof module?module.exports=t():"function"==typeof define&&define.amd?define([],t):"object"==typeof exports?exports.HtmlDiff=t():e.HtmlDiff=t()}(window,function(){return function(e){var t={};function n(s){if(t[s])return t[s].exports;var r=t[s]={i:s,l:!1,exports:{}};return e[s].call(r.exports,r,r.exports,n),r.l=!0,r.exports}return n.m=e,n.c=t,n.d=function(e,t,s){n.o(e,t)||Object.defineProperty(e,t,{configurable:!1,enumerable:!0,get:s})},n.r=function(e){Object.defineProperty(e,"__esModule",{value:!0})},n.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return n.d(t,"a",t),t},n.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},n.p="/dist/",n(n.s=1)}([function(e,t,n){"use strict";n.r(t);var s={equal:0,delete:1,insert:2,none:3,replace:4};class r{constructor(e,t,n){this.startInOld=e,this.startInNew=t,this.size=n}get endInOld(){return this.startInOld+this.size}get endInNew(){return this.startInNew+this.size}}class i{constructor(){this.blockSize=0,this.repeatingWordsAccuracy=0,this.ignoreWhitespaceDifferences=!1}}const o=/^\s*<\/?[^>]+>\s*$/,l=/<[^\s>]+/,h=/^(\s|&nbsp;)+$/,c=/[\w\#@]+/,d=["<img"];function a(e){return!d.some(t=>null!==e&&e.startsWith(t))&&o.test(e)}function u(e,t,n){return["<",t,' class="',n,'">',e,"</",t,">"].join("")}function f(e){return"<"===e}function p(e){return"&"===e}function w(e){return";"===e}function g(e){return h.test(e)}function I(e){return a(e)?function(e){return e=l.exec(e)[0]+(e.endsWith("/>")?"/>":">")}(e):e}function W(e){return c.test(e)}function x(e,t,n){return e.push(t),e.length>n&&e.shift(),e.length!==n?null:e.join("")}class O{constructor(e,t,n,s,r,i,o){this.oldWords=e,this.newWords=t,this.startInOld=n,this.endInOld=s,this.startInNew=r,this.endInNew=i,this.options=o}indexNewWords(){this.wordIndices=new Map;let e=[];for(let t=this.startInNew;t<this.endInNew;t++){let n=x(e,this.normalizeForIndex(this.newWords[t]),this.options.blockSize);null!==n&&(this.wordIndices.has(n)?this.wordIndices.get(n).push(t):this.wordIndices.set(n,[t]))}}normalizeForIndex(e){return e=I(e),this.options.IgnoreWhiteSpaceDifferences&&g(e)?" ":e}findMatch(){if(this.indexNewWords(),this.removeRepeatingWords(),0===this.wordIndices.length)return null;let e=this.startInOld,t=this.startInNew,n=0,s=new Map;const i=this.options.blockSize;let o=[];for(let r=this.startInOld;r<this.endInOld;r++){let l=x(o,this.normalizeForIndex(this.oldWords[r]),i);if(null===l)continue;let h=new Map;if(this.wordIndices.has(l)){for(let o of this.wordIndices.get(l)){let l=(s.has(o-1)?s.get(o-1):0)+1;h.set(o,l),l>n&&(e=r-l-i+2,t=o-l-i+2,n=l)}s=h}else s=h}return 0!==n?new r(e,t,n+i-1):null}removeRepeatingWords(){let e=this.newWords.length+this.options.repeatingWordsAccuracy,t=Array.from(this.wordIndices.entries()).filter(t=>t[1].length>e).map(e=>e[0]);for(let e of t)this.wordIndices.delete(e)}}class m{constructor(e,t,n,s,r){this.action=e,this.startInOld=t,this.endInOld=n,this.startInNew=s,this.endInNew=r}}var b={character:0,tag:1,whitespace:2,entity:3};function N(e,t){let n={mode:b.character,currentWord:[],words:[]},s=function(e,t){let n=new Map;if(null===t)return n;for(let s of t){let t;for(;null!==(t=s.exec(e));){if(n.has(t.index))throw new Error("One or more block expressions result in a text sequence that overlaps. Current expression: "+s.toString());n.set(t.index,t.index+t[0].length)}}return n}(e,t),r=!!s.size,i=!1,o=-1;for(let t=0;t<e.length;t++){var l=e[t];if(r){o===index&&(o=-1,i=!1);let e=0;if(s.has(index)&&(i=!0,o=e=s.get(index)),i){n.currentWord.push(l),n.mode=b.character;continue}}switch(n.mode){case b.character:f(l)?k(n,"<",b.tag):p(l)?k(n,l,b.entity):g(l)?k(n,l,b.whitespace):W(l)&&(0===n.currentWord.length||W(n.currentWord[n.currentWord.length-1]))?n.currentWord.push(l):k(n,l,b.character);break;case b.tag:">"===l?(n.currentWord.push(l),n.words.push(n.currentWord.join("")),n.currentWord=[],n.mode=g(l)?b.whitespace:b.character):n.currentWord.push(l);break;case b.whitespace:f(l)?k(n,l,b.tag):p(l)?k(n,l,b.entity):g(l)?n.currentWord.push(l):k(n,l,b.character);break;case b.entity:if(f(l))k(n,l,b.tag);else if(g(l))k(n,l,b.whitespace);else if(w(l)){let e=!0;if(0!==n.currentWord.length&&(n.currentWord.push(l),n.words.push(n.currentWord.join("")),n.words.length>2&&g(n.words[n.words.length-2])&&g(n.words[n.words.length-1]))){let t=n.words[n.words.length-2],s=n.words[n.words.length-1];n.words.splice(n.words.length-2,2),n.currentWord=[(t+s).split()],n.mode=b.whitespace,e=!1}e&&(n.currentWord=[],n.mode=b.character)}else W(l)?n.currentWord.push(l):k(n,l,b.character)}}return 0!==n.currentWord.length&&n.words.push(n.currentWord.join("")),n.words}function k(e,t,n){0!==e.currentWord.length&&e.words.push(e.currentWord.join("")),e.currentWord=[t],e.mode=n}const y=4,M=new Map([["</strong>",0],["</em>",0],["</b>",0],["</i>",0],["</big>",0],["</small>",0],["</u>",0],["</sub>",0],["</strike>",0],["</s>",0],["</dfn>",0]]),T=/<((strong)|(b)|(i)|(dfn)|(em)|(big)|(small)|(u)|(sub)|(sup)|(strike)|(s))[\>\s]+/gi;class j{constructor(e,t){this.content=[],this.newText=t,this.oldText=e,this.specialTagDiffStack=[],this.newWords=[],this.oldWords=[],this.matchGranularity=0,this.blockExpressions=[],this.repeatingWordsAccuracy=1,this.ignoreWhiteSpaceDifferences=!1,this.orphanMatchThreshold=0}build(){if(this.oldText===this.newText)return this.newText;this.splitInputsIntoWords(),this.matchGranularity=Math.min(y,this.oldWords.length,this.newWords.length);let e=this.operations();for(let t of e)this.performOperation(t);return this.content.join("")}addBlockExpression(e){this.blockExpressions.push(e)}splitInputsIntoWords(){this.oldWords=N(this.oldText,this.blockExpressions),this.oldText=null,this.newWords=N(this.newText,this.blockExpressions),this.newText=null}performOperation(e){switch(e.action){case s.equal:this.processEqualOperation(e);break;case s.delete:this.processDeleteOperation(e,"diffdel");break;case s.insert:this.processInsertOperation(e,"diffins");break;case s.none:break;case s.replace:this.processReplaceOperation(e)}}processReplaceOperation(e){this.processDeleteOperation(e,"diffmod"),this.processInsertOperation(e,"diffmod")}processInsertOperation(e,t){let n=this.newWords.filter((t,n)=>n>=e.startInNew&&n<e.endInNew);this.insertTag("ins",t,n)}processDeleteOperation(e,t){let n=this.oldWords.filter((t,n)=>n>=e.startInOld&&n<e.endInOld);this.insertTag("del",t,n)}processEqualOperation(e){let t=this.newWords.filter((t,n)=>n>=e.startInNew&&n<e.endInNew);this.content.push(t.join(""))}insertTag(e,t,n){for(;n.length;){let s=this.extractConsecutiveWords(n,e=>!a(e)),r="",i=!1;if(0!==s.length){let n=u(s.join(""),e,t);this.content.push(n)}else{if(T.test(n[0])){if(this.specialTagDiffStack.push(n[0]),r='<ins class="mod">',"del"===e)for(n.shift();n.length>0&&T.test(n[0]);)n.shift()}else if(M.has(n[0])){let t=0===this.specialTagDiffStack.length?null:this.specialTagDiffStack.pop();if(null!==t&&t===n[0].replace(/\//g,"")&&(r="</ins>",i=!0),"del"===e)for(n.shift();n.length>0&&M.has(n[0]);)n.shift()}if(0===n.length&&0===r.length)break;i?this.content.push(r+this.extractConsecutiveWords(n,a).join("")):this.content.push(this.extractConsecutiveWords(n,a).join("")+r)}}}extractConsecutiveWords(e,t){let n=null;for(let s=0;s<e.length;s++){let r=e[s];if(0===s&&" "===r&&(e[s]="&nbsp;"),!t(r)){n=s;break}}if(null!==n){let t=e.filter((e,t)=>t>=0&&t<n);return n>0&&e.splice(0,n),t}{let t=e.filter((t,n)=>n>=0&&n<e.length);return e.splice(0,e.length),t}}operations(){let e=0,t=0,n=[],i=this.matchingBlocks();i.push(new r(this.oldWords.length,this.newWords.length,0));let o=this.removeOrphans(i);for(let r of o){let i,o=e===r.startInOld,l=t===r.startInNew;(i=o||l?o&&!l?s.insert:o?s.none:s.delete:s.replace)!==s.none&&n.push(new m(i,e,r.startInOld,t,r.startInNew)),0!==r.length&&n.push(new m(s.equal,r.startInOld,r.endInOld,r.startInNew,r.endInNew)),e=r.endInOld,t=r.endInNew}return n}*removeOrphans(e){let t=null,n=null;for(let s of e){if(null===n){t=new r(0,0,0),n=s;continue}if(t.endInOld===n.startInOld&&t.endInNew===n.startInNew||n.endInOld===s.startInOld&&n.endInNew===s.startInNew){yield n;t=n;n=s;continue}let e=(e,t)=>e+t.length,i=this.oldWords.slice(t.endInOld,s.startInOld).reduce(e,0),o=this.newWords.slice(t.endInNew,s.startInNew).reduce(e,0);this.newWords.slice(n.startInNew,n.endInNew).reduce(e,0)>Math.max(i,o)*this.orphanMatchThreshold&&(yield n),t=n,n=s}yield n}matchingBlocks(){let e=[];return this.findMatchingBlocks(0,this.oldWords.length,0,this.newWords.length,e),e}findMatchingBlocks(e,t,n,s,r){let i=this.findMatch(e,t,n,s);null!==i&&(e<i.startInOld&&n<i.startInNew&&this.findMatchingBlocks(e,i.startInOld,n,i.startInNew,r),r.push(i),i.endInOld<t&&i.endInNew<s&&this.findMatchingBlocks(i.endInOld,t,i.endInNew,s,r))}findMatch(e,t,n,s){for(let r=this.matchGranularity;r>0;r--){let o=new i;o.blockSize=r,o.repeatingWordsAccuracy=this.repeatingWordsAccuracy,o.ignoreWhitespaceDifferences=this.ignoreWhiteSpaceDifferences;let l=new O(this.oldWords,this.newWords,e,t,n,s,o).findMatch();if(null!==l)return l}return null}}j.execute=function(e,t){return new j(e,t).build()};t.default=j},function(e,t,n){e.exports=n(0)}])});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment