Skip to content

Instantly share code, notes, and snippets.

@Calmarius
Last active July 24, 2022 04:09
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save Calmarius/24bdf9b753eb0553c148 to your computer and use it in GitHub Desktop.
Save Calmarius/24bdf9b753eb0553c148 to your computer and use it in GitHub Desktop.
PHP diff algorithm with minimum gap removal
/**
* Removes gaps between the diff block by fusing the lines into the diff.
*
* $diff: A diff created by computeDiff.
* $minGaps: a number specifying the smallest allowable gap between two diff blocks.
*
* Returns a new diff (see computeDiff for details.)
*/
function removeDiffGaps($diff, $minGaps)
{
if (!isset($diff['values'])) return FALSE;
if (!isset($diff['mask'])) return FALSE;
$values = $diff['values'];
$mask = $diff['mask'];
$n = count($mask);
$newValues = array();
$newMask = array();
$inUnchangedBlock = FALSE;
$unchangedLength = 0;
$unchangedStart = -1;
$pushValues = function($start, $end, $values, $mask) use (&$newValues, &$newMask)
{
for ($j = $start; $j < $end; $j++)
{
$newValues[] = $values[$j];
$newMask[] = $mask;
}
};
// Mark short unchanged blocks with '-' and '+' as if they are removed or added at the same time.
for ($i = 0; $i < $n; $i++)
{
$maskval = $mask[$i];
$value = $values[$i];
if ($maskval == 0)
{
if (!$inUnchangedBlock)
{
$unchangedLength = 0;
$unchangedStart = $i;
$inUnchangedBlock = TRUE;
}
$unchangedLength++;
}
else
{
if ($inUnchangedBlock)
{
if ($unchangedLength < $minGaps)
{
for ($j = $unchangedStart; $j < $i; $j++)
{
$newValues[] = $values[$j];
$newMask[] = -1;
$newValues[] = $values[$j];
$newMask[] = 1;
}
}
else
{
$pushValues($unchangedStart, $i, $values, 0);
}
$inUnchangedBlock = FALSE;
}
$newValues[] = $value;
$newMask[] = $maskval;
}
}
if ($inUnchangedBlock)
{
$pushValues($unchangedStart, $i, $values, 0);
}
// Now sort the diff blocks so '-'s precede the '+'s,
$values = $newValues;
$mask = $newMask;
$n = count($mask);
$newValues = array();
$newMask = array();
$inUnchangedBlock = FALSE;
$minuses = array();
$pluses = array();
$pushMinusPlus = function($minuses, $pluses) use ($pushValues)
{
$m = count($minuses);
$pushValues(0, $m, $minuses, -1);
$m = count($pluses);
$pushValues(0, $m, $pluses, 1);
};
for ($i = 0; $i < $n; $i++)
{
$maskval = $mask[$i];
$value = $values[$i];
if ($maskval == 0)
{
if (!$inUnchangedBlock)
{
$pushMinusPlus($minuses, $pluses);
$inUnchangedBlock = TRUE;
}
$newValues[] = $value;
$newMask[] = $maskval;
}
else
{
if ($inUnchangedBlock)
{
$inUnchangedBlock = FALSE;
$minuses = array();
$pluses = array();
}
if ($maskval == 1)
{
$pluses[] = $value;
}
else
{
$minuses[] = $value;
}
}
}
if (!$inUnchangedBlock)
{
$pushMinusPlus($minuses, $pluses);
}
return array('values' => $newValues, 'mask' => $newMask);
}
/**
* Computes the diff of two arrays.
*
* $from, $to: The two arrays to compare.
* $lcsOnly: Set true if you are only interested in the longest common subsequence and down want the - and +-s.
*
* Returns an associative array containing two fields:
*
* values: Union of the two arrays containing all the common and different elements.
* mask: Contains numbers: 0: unchanged, -1: removed, 1: added.
*
* mask[i] corresponds to values[i]
*
*/
function computeDiff($from, $to, $lcsOnly = FALSE)
{
$diffValues = array();
$diffMask = array();
$dm = array();
$n1 = count($from);
$n2 = count($to);
$n = min($n1, $n2);
// Extract unchanged head and remove unchanged elements from both arrays.
$headVal = array();
$headMask = array();
$firstChanged = $n;
for ($i = 0; $i < $n; $i++)
{
if ($from[$i] != $to[$i])
{
$firstChanged = $i;
break;
}
$headVal[] = $from[$i];
$headMask[] = 0;
}
$from = array_slice($from, $firstChanged);
$to = array_slice($to, $firstChanged);
$n1 -= $firstChanged;
$n2 -= $firstChanged;
$n -= $firstChanged;
// Extract unchanged tail and remove unchanged elements from both arrays.
$tailVal = array();
$tailMask = array();
$lastChanged = $n;
for ($i = 1; $i <= $n; $i++)
{
if ($from[$n1 - $i] != $to[$n2 - $i])
{
$lastChanged = $i - 1;
break;
}
$tailVal[] = $from[$n1 - $i];
$tailMask[] = 0;
}
$tailVal = array_reverse($tailVal);
$from = array_slice($from, 0, $n1 - $lastChanged);
$to = array_slice($to, 0, $n2 - $lastChanged);
$n1 -= $lastChanged;
$n2 -= $lastChanged;
$n -= $lastChanged;
// Generate LCS matrix. First is the column, second is the row.
for ($j = -1; $j < $n2; $j++) $dm[-1][$j] = 0;
for ($i = -1; $i < $n1; $i++) $dm[$i][-1] = 0;
ksort($dm[-1]);
ksort($dm);
for ($i = 0; $i < $n1; $i++)
{
for ($j = 0; $j < $n2; $j++)
{
if ($from[$i] == $to[$j])
{
$ad = $dm[$i - 1][$j - 1];
$dm[$i][$j] = $ad + 1;
}
else
{
$a1 = $dm[$i - 1][$j];
$a2 = $dm[$i][$j - 1];
$dm[$i][$j] = max($a1, $a2);
}
}
}
// Traverse the diff matrix.
$i = $n1 - 1;
$j = $n2 - 1;
while (($i > -1) || ($j > -1))
{
if ($j > -1)
{
if ($dm[$i][$j - 1] == $dm[$i][$j])
{
if (!$lcsOnly)
{
$diffValues[] = $to[$j];
$diffMask[] = 1;
}
$j--;
continue;
}
}
if ($i > -1)
{
if ($dm[$i - 1][$j] == $dm[$i][$j])
{
if (!$lcsOnly)
{
$diffValues[] = $from[$i];
$diffMask[] = -1;
}
$i--;
continue;
}
}
{
$diffValues[] = $from[$i];
$diffMask[] = 0;
$i--;
$j--;
}
}
$diffValues = array_reverse($diffValues);
$diffMask = array_reverse($diffMask);
$diffValues = array_merge($headVal, $diffValues, $tailVal);
$diffMask = array_merge($headMask, $diffMask, $tailMask);
if ($lcsOnly) return $diffValues;
return array('values' => $diffValues, 'mask' => $diffMask);
}
@gbutiri
Copy link

gbutiri commented Jan 9, 2019

Great tool! Thank you.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment