Last active
December 20, 2015 21:18
-
-
Save homeyjd/6196699 to your computer and use it in GitHub Desktop.
Compute the synchronization steps between two key-value pairs. Consider:
* no key can conflict. That is, if 4 is deleted, and 5 is moved to 4, all currently-tagged 4's must be moved OUT before 5 can be moved, else you get key conflicts.
* delete operations should be reduced to default, but since default might be equal to a changing ID, the delet…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Compute the tasks that must occur to convert a key-value table specified by $src | |
* into a new key-value $dest where the VALUES are the important tags and the IDs | |
* are simply placeholders. | |
* | |
* This is helpful with user-provided data where keys are stored in the database | |
* (especially as integers) but user-provided values (labels for records, etc.) | |
* are the actual identifiers for the data. | |
* | |
* The resulting array will contain data with possibly integer or string type. All | |
* incoming keys are normalized to strings, but upper-limit moves are used with | |
* un-casted integers, and so will come out as numbers. If you are sure your move | |
* operations will always be integers, then forcibly cast the results of this | |
* function to (int) before creating your queries. | |
* | |
* @param array $src Key-value source | |
* @param array $dest Key-value destination | |
* @return array An array of tasks that will translate stored data to the new schema | |
* @author Jesse Decker | |
* @version 0.3 | |
*/ | |
$computeDiffOfKeyByValue = function (array $src, array $dest, $default = null) { | |
$moveTasks = array(); | |
$valueMap = array(); | |
$srcKeys = array(); | |
// 1) Compare source and destination | |
foreach ( $src as $key => $value ) { | |
$key = '' . $key; | |
// If the value will exist when done (returns first match) | |
// This isn't the fastest, but ensures dealing with possible duplicate values | |
if (($index = array_search($value, $dest, true)) !== false) { | |
// If it's changing IDs | |
$index = '' . $index; | |
if ($index !== $key) { | |
$valueMap[] = array ( | |
'src' => $key, | |
'dest' => $index | |
); | |
$srcKeys[$key] = $index; | |
} else { | |
$srcKeys[$key] = $key; | |
} | |
} | |
// Else it will be deleted | |
else { | |
$valueMap[] = array ('src' => $key, 'dest' => $default, 'delete' => true); | |
$srcKeys[$key] = $default; | |
} | |
} | |
if ( count($valueMap) ) { | |
// If default is a non-conflict key, then remove the delete flag so we can optimize | |
if (!isset($srcKeys[$default])) { | |
foreach ( $valueMap as &$vals ) { | |
if (isset($vals['delete'])) { | |
unset( $vals['delete'] ); | |
} | |
} | |
} | |
$performNonConflictMoves = function ($moveDeletes = false) use (&$moveTasks, &$valueMap, &$srcKeys) { | |
// For all changing values, if there is no conflict, move the key | |
// Deletes could potentially have key collisions because the default might | |
// be a pre-existing key. So we remove the collision check for delete operations. | |
$foundOne = false; | |
do { | |
$found = false; | |
foreach ( $valueMap as $i => $vals ) { | |
if ($moveDeletes or (!isset($srcKeys[$vals['dest']]) and !isset($vals['delete']))) { | |
$found = true; | |
$foundOne = true; | |
$moveTasks[] = array ($vals['src'], $vals['dest']); | |
unset( $srcKeys[$vals ['src']] ); | |
$srcKeys[$vals['dest']] = null; | |
unset( $valueMap[$i] ); | |
} | |
} | |
} while ($found); | |
return $foundOne; | |
}; | |
// 2) Perform non-conflict moves first | |
$performNonConflictMoves(); | |
} | |
// If we still have tasks to do | |
if ( count($valueMap) ) { | |
// we need to find an unused key. because it might be a string or an integer, | |
// we use a number that won't match the string or integer versions. | |
$limit = 99; // signed tinyint only hits 124, so can't start too high | |
$getNextLimit = function () use (&$limit, &$srcKeys) { | |
// this is a horrible loop, but I must find the next non-conflict index | |
while ( true ) { | |
$limit ++; | |
if (! isset( $srcKeys[(string) $limit] )) { | |
return $limit; | |
} | |
} | |
}; | |
// 3) For all remaining tasks, find a task that is in conflict, and move it to a non-conflict ID | |
do { | |
// foreach will generate an iterator which will copy-on-write | |
// we don't want that | |
$movedOne = false; | |
$found = false; | |
foreach ( $valueMap as $index => $vals ) { | |
// find out if changing this value's src will make a difference | |
foreach ( $valueMap as $dest ) { | |
if ($dest['dest'] === $vals['src']) { | |
$found = true; | |
break 2; | |
} | |
} | |
} | |
// if we found a conflict | |
if ($found) { | |
$nextLimit = $getNextLimit(); | |
// move the current src to an unused ID | |
$moveTasks[] = array( $vals['src'], $nextLimit ); | |
unset( $srcKeys[$vals['src']] ); | |
// plan to move it back later | |
$valueMap[$index]['src'] = $nextLimit; | |
$movedOne = $performNonConflictMoves(); | |
} | |
if (! $movedOne) { | |
$performNonConflictMoves(true); | |
} | |
} while ( count($valueMap) ); | |
} | |
return $moveTasks; | |
}; | |
$oldValues = (array) json_decode(' | |
{ | |
"0": "Not Defined", | |
"1": "Proposed", | |
"2": "In Planning", | |
"3": "In Progress", | |
"4": "In Testing", | |
"5": "On Dev", | |
"6": "On Prod", | |
"7": "Process", | |
"8": "On Hold", | |
"9": "Template", | |
"10": "Closed", | |
"11": "SCRs", | |
"12": "Scrap", | |
"13": "TE", | |
"14": "Reports", | |
"15": "Sysops", | |
"16": "Specs" | |
} | |
'); | |
$newValues = (array) json_decode(' | |
{ | |
"0": "Not Defined", | |
"1": "Proposed", | |
"2": "In Planning", | |
"3": "In Testing",' /* This has been moved from 4 */ .' | |
"4": "NEW THING",' /* This is a new option */ .' | |
"5": "On Dev", | |
"6": "On Prod", | |
"7": "Process", | |
'/*"8": "On Hold", this is removed */ .' | |
"9": "Template", | |
"10": "Closed", | |
"11": "SCRs", | |
"12": "Scrap", | |
"13": "TE", | |
'/*"14": "Reports", this is removed, and above shifted down */ .' | |
"14": "Sysops", | |
"15": "Specs" | |
} | |
'); | |
// If default is in conflict | |
echo json_encode( $computeDiffOfKeyByValue($oldValues, $newValues, 0) ); | |
/* Prints Out: | |
[["3",100],["4","3"],["14",101],["15","14"],["16","15"],[100,0],["8",0],[101,0]] | |
*/ | |
// If default is no-conflict | |
echo json_encode( $computeDiffOfKeyByValue($oldValues, $newValues) ); | |
/* Prints Out: | |
[["3",null],["4","3"],["8",null],["14",null],["15","14"],["16","15"]] | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment