Created
June 29, 2012 00:35
-
-
Save kyledseever/3014950 to your computer and use it in GitHub Desktop.
Group Strings By Prefix
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// returns the position of the first differing character between | |
// $left and $right, or -1 if either is empty | |
function strcmppos($left, $right) { | |
if (empty($left) || empty($right)) { | |
return -1; | |
} | |
$i = 0; | |
while ($left[$i] && $left[$i] == $right[$i]) { | |
$i++; | |
} | |
return $i - 1; | |
} | |
assert(strcmppos('', '') === -1); | |
assert(strcmppos('a', '') === -1); | |
assert(strcmppos('a', 'a') === 0); | |
assert(strcmppos('foo/bar/baz', 'foo/bar/zab') === 7); | |
// returns the part of the string preceding (but not including) the | |
// final directory delimiter, or empty if none are found | |
function truncate_to_last_dir($str) { | |
return (string)substr($str, 0, strrpos($str, '/')); | |
} | |
assert(truncate_to_last_dir('') === ''); | |
assert(truncate_to_last_dir('foo') === ''); | |
assert(truncate_to_last_dir('foo/') === 'foo'); | |
assert(truncate_to_last_dir('foo/bar') === 'foo'); | |
function flatten_array($array) { | |
$merged = array(); | |
foreach ($array as $arr) { | |
$merged = array_merge($merged, $arr); | |
} | |
return $merged; | |
} | |
function group_array_by_directory_prefix($strings) { | |
$groups = array(); | |
$numStrings = count($strings); | |
for ($i = 0; $i < $numStrings; $i++) { | |
for ($j = $i + 1; $j < $numStrings; $j++) { | |
$pos = strcmppos($strings[$i], $strings[$j]); | |
$prefix = truncate_to_last_dir(substr($strings[$i], 0, $pos + 1)); | |
// append to grouping for this prefix. include both strings - this | |
// gives duplicates which we'll merge later | |
$groups[$prefix][] = array($strings[$i], $strings[$j]); | |
} | |
} | |
foreach ($groups as &$group) { | |
// to remove duplicates introduced above | |
$group = array_unique(flatten_array($group)); | |
} | |
return $groups; | |
} | |
function unique_group_by_directory_prefix($strings) { | |
$groups = group_array_by_directory_prefix($strings); | |
// sort descending on key length | |
uksort($groups, function($left, $right) { | |
return strlen($right) - strlen($left); | |
}); | |
// starting from the top, perform the set difference to remove duplication | |
// across groups | |
$current = reset($groups); | |
$unique_groups = array(key($groups) => $current); | |
$next = next($groups); | |
while ($next) { | |
$diff = array_diff($next, $current); | |
$unique_groups[key($groups)] = $diff; | |
$current = $next; | |
$next = next($groups); | |
} | |
return $unique_groups; | |
} | |
$paths = array( | |
'a/b/c', | |
'a/b/d', | |
'a/e', | |
'f', | |
); | |
var_dump(unique_group_by_directory_prefix($paths)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment