Skip to content

Instantly share code, notes, and snippets.

@creepone
Last active December 14, 2015 16:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save creepone/5119459 to your computer and use it in GitHub Desktop.
Save creepone/5119459 to your computer and use it in GitHub Desktop.
Diff adapted for conservative merge
// Copyright (c) 2006, 2008 Tony Garnock-Jones <tonyg@lshift.net>
// Copyright (c) 2006, 2008 LShift Ltd. <query@lshift.net>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation files
// (the "Software"), to deal in the Software without restriction,
// including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software,
// and to permit persons to whom the Software is furnished to do so,
// subject to the following conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
(function() {
function longest_common_subsequence(file1, file2)
{
/* Text diff algorithm following Hunt and McIlroy 1976.
* J. W. Hunt and M. D. McIlroy, An algorithm for differential file
* comparison, Bell Telephone Laboratories CSTR #41 (1976)
* http://www.cs.dartmouth.edu/~doug/
*
* Expects two arrays of strings.
*/
var equivalenceClasses;
var file2indices;
var newCandidate;
var candidates;
var line;
var c, i, j, jX, r, s;
equivalenceClasses = {};
for (j = 0; j < file2.length; j++) {
line = file2[j];
if (equivalenceClasses[line]) {
equivalenceClasses[line].push(j);
} else {
equivalenceClasses[line] = [j];
}
}
candidates = [{file1index: -1,
file2index: -1,
chain: null}];
for (i = 0; i < file1.length; i++) {
line = file1[i];
file2indices = equivalenceClasses[line] || [];
r = 0;
c = candidates[0];
for (jX = 0; jX < file2indices.length; jX++) {
j = file2indices[jX];
for (s = r; s < candidates.length; s++) {
if ((candidates[s].file2index < j) &&
((s == candidates.length - 1) ||
(candidates[s + 1].file2index > j)))
break;
}
if (s < candidates.length) {
newCandidate = {file1index: i,
file2index: j,
chain: candidates[s]};
if (r == candidates.length) {
candidates.push(c);
} else {
candidates[r] = c;
}
r = s + 1;
c = newCandidate;
if (r == candidates.length) {
break; // no point in examining further (j)s
}
}
}
candidates[r] = c;
}
// At this point, we know the LCS: it's in the reverse of the
// linked-list through .chain of
// candidates[candidates.length - 1].
return candidates[candidates.length - 1];
}
function diff_comm(file1, file2)
{
// We apply the LCS to build a "comm"-style picture of the
// differences between file1 and file2.
var result = [];
var tail1 = file1.length;
var tail2 = file2.length;
var common = {common: []};
function processCommon() {
if (common.common.length) {
common.common.reverse();
result.push(common);
common = {common: []};
}
}
for (var candidate = longest_common_subsequence(file1, file2);
candidate !== null;
candidate = candidate.chain)
{
var different = {file1: [], file2: []};
while (--tail1 > candidate.file1index) {
different.file1.push(file1[tail1]);
}
while (--tail2 > candidate.file2index) {
different.file2.push(file2[tail2]);
}
if (different.file1.length || different.file2.length) {
processCommon();
different.file1.reverse();
different.file2.reverse();
result.push(different);
}
if (tail1 >= 0) {
common.common.push(file1[tail1]);
}
}
processCommon();
result.reverse();
return result;
}
function preprocess(file)
{
// splits the contents of the file1 to words and non-trivial sequences of white space (incl. line breaks)
var res = [],
words = file.split(/\s+/),
whitespaces = file.match(/\s+/g) || [];
for (var i = 0; i < words.length || i < whitespaces.length; i++) {
var word = words[i], ws = whitespaces[i];
if (word)
res.push(word);
if (ws && ws !== " ")
res.push(ws);
}
return res;
}
window.merge = function(file1, file2)
{
// we use the diff_comm result to perform an automatic conservative (lossless) merge
var diff = diff_comm(preprocess(file1), preprocess(file2));
var resArr = [];
for (var i = 0; i < diff.length; i++) {
var o = diff[i];
if (o.common) {
[].push.apply(resArr, o.common);
}
else {
[].push.apply(resArr, o.file1);
[].push.apply(resArr, o.file2);
}
}
var res = "";
for (i = 0; i < resArr.length; i++)
{
var token = resArr[i];
res += token;
if (!token.match(/\s+/) && i < resArr.length - 1)
res += " ";
}
return res;
}
}());
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment