Skip to content

Instantly share code, notes, and snippets.

@homeyjd
Last active December 20, 2015 21:18
Show Gist options
  • Save homeyjd/6196699 to your computer and use it in GitHub Desktop.
Save homeyjd/6196699 to your computer and use it in GitHub Desktop.
Compute the synchronization steps between two key-value pairs. Consider: * no key can conflict. That is, if 4 is deleted, and 5 is moved to 4, all currently-tagged 4's must be moved OUT before 5 can be moved, else you get key conflicts. * delete operations should be reduced to default, but since default might be equal to a changing ID, the delet…
<?php
/**
* Compute the tasks that must occur to convert a key-value table specified by $src
* into a new key-value $dest where the VALUES are the important tags and the IDs
* are simply placeholders.
*
* This is helpful with user-provided data where keys are stored in the database
* (especially as integers) but user-provided values (labels for records, etc.)
* are the actual identifiers for the data.
*
* The resulting array will contain data with possibly integer or string type. All
* incoming keys are normalized to strings, but upper-limit moves are used with
* un-casted integers, and so will come out as numbers. If you are sure your move
* operations will always be integers, then forcibly cast the results of this
* function to (int) before creating your queries.
*
* @param array $src Key-value source
* @param array $dest Key-value destination
* @return array An array of tasks that will translate stored data to the new schema
* @author Jesse Decker
* @version 0.3
*/
$computeDiffOfKeyByValue = function (array $src, array $dest, $default = null) {
$moveTasks = array();
$valueMap = array();
$srcKeys = array();
// 1) Compare source and destination
foreach ( $src as $key => $value ) {
$key = '' . $key;
// If the value will exist when done (returns first match)
// This isn't the fastest, but ensures dealing with possible duplicate values
if (($index = array_search($value, $dest, true)) !== false) {
// If it's changing IDs
$index = '' . $index;
if ($index !== $key) {
$valueMap[] = array (
'src' => $key,
'dest' => $index
);
$srcKeys[$key] = $index;
} else {
$srcKeys[$key] = $key;
}
}
// Else it will be deleted
else {
$valueMap[] = array ('src' => $key, 'dest' => $default, 'delete' => true);
$srcKeys[$key] = $default;
}
}
if ( count($valueMap) ) {
// If default is a non-conflict key, then remove the delete flag so we can optimize
if (!isset($srcKeys[$default])) {
foreach ( $valueMap as &$vals ) {
if (isset($vals['delete'])) {
unset( $vals['delete'] );
}
}
}
$performNonConflictMoves = function ($moveDeletes = false) use (&$moveTasks, &$valueMap, &$srcKeys) {
// For all changing values, if there is no conflict, move the key
// Deletes could potentially have key collisions because the default might
// be a pre-existing key. So we remove the collision check for delete operations.
$foundOne = false;
do {
$found = false;
foreach ( $valueMap as $i => $vals ) {
if ($moveDeletes or (!isset($srcKeys[$vals['dest']]) and !isset($vals['delete']))) {
$found = true;
$foundOne = true;
$moveTasks[] = array ($vals['src'], $vals['dest']);
unset( $srcKeys[$vals ['src']] );
$srcKeys[$vals['dest']] = null;
unset( $valueMap[$i] );
}
}
} while ($found);
return $foundOne;
};
// 2) Perform non-conflict moves first
$performNonConflictMoves();
}
// If we still have tasks to do
if ( count($valueMap) ) {
// we need to find an unused key. because it might be a string or an integer,
// we use a number that won't match the string or integer versions.
$limit = 99; // signed tinyint only hits 124, so can't start too high
$getNextLimit = function () use (&$limit, &$srcKeys) {
// this is a horrible loop, but I must find the next non-conflict index
while ( true ) {
$limit ++;
if (! isset( $srcKeys[(string) $limit] )) {
return $limit;
}
}
};
// 3) For all remaining tasks, find a task that is in conflict, and move it to a non-conflict ID
do {
// foreach will generate an iterator which will copy-on-write
// we don't want that
$movedOne = false;
$found = false;
foreach ( $valueMap as $index => $vals ) {
// find out if changing this value's src will make a difference
foreach ( $valueMap as $dest ) {
if ($dest['dest'] === $vals['src']) {
$found = true;
break 2;
}
}
}
// if we found a conflict
if ($found) {
$nextLimit = $getNextLimit();
// move the current src to an unused ID
$moveTasks[] = array( $vals['src'], $nextLimit );
unset( $srcKeys[$vals['src']] );
// plan to move it back later
$valueMap[$index]['src'] = $nextLimit;
$movedOne = $performNonConflictMoves();
}
if (! $movedOne) {
$performNonConflictMoves(true);
}
} while ( count($valueMap) );
}
return $moveTasks;
};
$oldValues = (array) json_decode('
{
"0": "Not Defined",
"1": "Proposed",
"2": "In Planning",
"3": "In Progress",
"4": "In Testing",
"5": "On Dev",
"6": "On Prod",
"7": "Process",
"8": "On Hold",
"9": "Template",
"10": "Closed",
"11": "SCRs",
"12": "Scrap",
"13": "TE",
"14": "Reports",
"15": "Sysops",
"16": "Specs"
}
');
$newValues = (array) json_decode('
{
"0": "Not Defined",
"1": "Proposed",
"2": "In Planning",
"3": "In Testing",' /* This has been moved from 4 */ .'
"4": "NEW THING",' /* This is a new option */ .'
"5": "On Dev",
"6": "On Prod",
"7": "Process",
'/*"8": "On Hold", this is removed */ .'
"9": "Template",
"10": "Closed",
"11": "SCRs",
"12": "Scrap",
"13": "TE",
'/*"14": "Reports", this is removed, and above shifted down */ .'
"14": "Sysops",
"15": "Specs"
}
');
// If default is in conflict
echo json_encode( $computeDiffOfKeyByValue($oldValues, $newValues, 0) );
/* Prints Out:
[["3",100],["4","3"],["14",101],["15","14"],["16","15"],[100,0],["8",0],[101,0]]
*/
// If default is no-conflict
echo json_encode( $computeDiffOfKeyByValue($oldValues, $newValues) );
/* Prints Out:
[["3",null],["4","3"],["8",null],["14",null],["15","14"],["16","15"]]
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment