Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
PHP diff algorithm with minimum gap removal
/**
* Removes gaps between the diff block by fusing the lines into the diff.
*
* $diff: A diff created by computeDiff.
* $minGaps: a number specifying the smallest allowable gap between two diff blocks.
*
* Returns a new diff (see computeDiff for details.)
*/
function removeDiffGaps($diff, $minGaps)
{
if (!isset($diff['values'])) return FALSE;
if (!isset($diff['mask'])) return FALSE;
$values = $diff['values'];
$mask = $diff['mask'];
$n = count($mask);
$newValues = array();
$newMask = array();
$inUnchangedBlock = FALSE;
$unchangedLength = 0;
$unchangedStart = -1;
$pushValues = function($start, $end, $values, $mask) use (&$newValues, &$newMask)
{
for ($j = $start; $j < $end; $j++)
{
$newValues[] = $values[$j];
$newMask[] = $mask;
}
};
// Mark short unchanged blocks with '-' and '+' as if they are removed or added at the same time.
for ($i = 0; $i < $n; $i++)
{
$maskval = $mask[$i];
$value = $values[$i];
if ($maskval == 0)
{
if (!$inUnchangedBlock)
{
$unchangedLength = 0;
$unchangedStart = $i;
$inUnchangedBlock = TRUE;
}
$unchangedLength++;
}
else
{
if ($inUnchangedBlock)
{
if ($unchangedLength < $minGaps)
{
for ($j = $unchangedStart; $j < $i; $j++)
{
$newValues[] = $values[$j];
$newMask[] = -1;
$newValues[] = $values[$j];
$newMask[] = 1;
}
}
else
{
$pushValues($unchangedStart, $i, $values, 0);
}
$inUnchangedBlock = FALSE;
}
$newValues[] = $value;
$newMask[] = $maskval;
}
}
if ($inUnchangedBlock)
{
$pushValues($unchangedStart, $i, $values, 0);
}
// Now sort the diff blocks so '-'s precede the '+'s,
$values = $newValues;
$mask = $newMask;
$n = count($mask);
$newValues = array();
$newMask = array();
$inUnchangedBlock = FALSE;
$minuses = array();
$pluses = array();
$pushMinusPlus = function($minuses, $pluses) use ($pushValues)
{
$m = count($minuses);
$pushValues(0, $m, $minuses, -1);
$m = count($pluses);
$pushValues(0, $m, $pluses, 1);
};
for ($i = 0; $i < $n; $i++)
{
$maskval = $mask[$i];
$value = $values[$i];
if ($maskval == 0)
{
if (!$inUnchangedBlock)
{
$pushMinusPlus($minuses, $pluses);
$inUnchangedBlock = TRUE;
}
$newValues[] = $value;
$newMask[] = $maskval;
}
else
{
if ($inUnchangedBlock)
{
$inUnchangedBlock = FALSE;
$minuses = array();
$pluses = array();
}
if ($maskval == 1)
{
$pluses[] = $value;
}
else
{
$minuses[] = $value;
}
}
}
if (!$inUnchangedBlock)
{
$pushMinusPlus($minuses, $pluses);
}
return array('values' => $newValues, 'mask' => $newMask);
}
/**
* Computes the diff of two arrays.
*
* $from, $to: The two arrays to compare.
* $lcsOnly: Set true if you are only interested in the longest common subsequence and down want the - and +-s.
*
* Returns an associative array containing two fields:
*
* values: Union of the two arrays containing all the common and different elements.
* mask: Contains numbers: 0: unchanged, -1: removed, 1: added.
*
* mask[i] corresponds to values[i]
*
*/
function computeDiff($from, $to, $lcsOnly = FALSE)
{
$diffValues = array();
$diffMask = array();
$dm = array();
$n1 = count($from);
$n2 = count($to);
$n = min($n1, $n2);
// Extract unchanged head and remove unchanged elements from both arrays.
$headVal = array();
$headMask = array();
$firstChanged = $n;
for ($i = 0; $i < $n; $i++)
{
if ($from[$i] != $to[$i])
{
$firstChanged = $i;
break;
}
$headVal[] = $from[$i];
$headMask[] = 0;
}
$from = array_slice($from, $firstChanged);
$to = array_slice($to, $firstChanged);
$n1 -= $firstChanged;
$n2 -= $firstChanged;
$n -= $firstChanged;
// Extract unchanged tail and remove unchanged elements from both arrays.
$tailVal = array();
$tailMask = array();
$lastChanged = $n;
for ($i = 1; $i <= $n; $i++)
{
if ($from[$n1 - $i] != $to[$n2 - $i])
{
$lastChanged = $i - 1;
break;
}
$tailVal[] = $from[$n1 - $i];
$tailMask[] = 0;
}
$tailVal = array_reverse($tailVal);
$from = array_slice($from, 0, $n1 - $lastChanged);
$to = array_slice($to, 0, $n2 - $lastChanged);
$n1 -= $lastChanged;
$n2 -= $lastChanged;
$n -= $lastChanged;
// Generate LCS matrix. First is the column, second is the row.
for ($j = -1; $j < $n2; $j++) $dm[-1][$j] = 0;
for ($i = -1; $i < $n1; $i++) $dm[$i][-1] = 0;
ksort($dm[-1]);
ksort($dm);
for ($i = 0; $i < $n1; $i++)
{
for ($j = 0; $j < $n2; $j++)
{
if ($from[$i] == $to[$j])
{
$ad = $dm[$i - 1][$j - 1];
$dm[$i][$j] = $ad + 1;
}
else
{
$a1 = $dm[$i - 1][$j];
$a2 = $dm[$i][$j - 1];
$dm[$i][$j] = max($a1, $a2);
}
}
}
// Traverse the diff matrix.
$i = $n1 - 1;
$j = $n2 - 1;
while (($i > -1) || ($j > -1))
{
if ($j > -1)
{
if ($dm[$i][$j - 1] == $dm[$i][$j])
{
if (!$lcsOnly)
{
$diffValues[] = $to[$j];
$diffMask[] = 1;
}
$j--;
continue;
}
}
if ($i > -1)
{
if ($dm[$i - 1][$j] == $dm[$i][$j])
{
if (!$lcsOnly)
{
$diffValues[] = $from[$i];
$diffMask[] = -1;
}
$i--;
continue;
}
}
{
$diffValues[] = $from[$i];
$diffMask[] = 0;
$i--;
$j--;
}
}
$diffValues = array_reverse($diffValues);
$diffMask = array_reverse($diffMask);
$diffValues = array_merge($headVal, $diffValues, $tailVal);
$diffMask = array_merge($headMask, $diffMask, $tailMask);
if ($lcsOnly) return $diffValues;
return array('values' => $diffValues, 'mask' => $diffMask);
}
@gbutiri

This comment has been minimized.

Copy link

gbutiri commented Jan 9, 2019

Great tool! Thank you.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.