Skip to content

Instantly share code, notes, and snippets.

@kyledseever
Created June 29, 2012 00:35
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kyledseever/3014950 to your computer and use it in GitHub Desktop.
Save kyledseever/3014950 to your computer and use it in GitHub Desktop.
Group Strings By Prefix
<?php
// returns the position of the first differing character between
// $left and $right, or -1 if either is empty
function strcmppos($left, $right) {
if (empty($left) || empty($right)) {
return -1;
}
$i = 0;
while ($left[$i] && $left[$i] == $right[$i]) {
$i++;
}
return $i - 1;
}
assert(strcmppos('', '') === -1);
assert(strcmppos('a', '') === -1);
assert(strcmppos('a', 'a') === 0);
assert(strcmppos('foo/bar/baz', 'foo/bar/zab') === 7);
// returns the part of the string preceding (but not including) the
// final directory delimiter, or empty if none are found
function truncate_to_last_dir($str) {
return (string)substr($str, 0, strrpos($str, '/'));
}
assert(truncate_to_last_dir('') === '');
assert(truncate_to_last_dir('foo') === '');
assert(truncate_to_last_dir('foo/') === 'foo');
assert(truncate_to_last_dir('foo/bar') === 'foo');
function flatten_array($array) {
$merged = array();
foreach ($array as $arr) {
$merged = array_merge($merged, $arr);
}
return $merged;
}
function group_array_by_directory_prefix($strings) {
$groups = array();
$numStrings = count($strings);
for ($i = 0; $i < $numStrings; $i++) {
for ($j = $i + 1; $j < $numStrings; $j++) {
$pos = strcmppos($strings[$i], $strings[$j]);
$prefix = truncate_to_last_dir(substr($strings[$i], 0, $pos + 1));
// append to grouping for this prefix. include both strings - this
// gives duplicates which we'll merge later
$groups[$prefix][] = array($strings[$i], $strings[$j]);
}
}
foreach ($groups as &$group) {
// to remove duplicates introduced above
$group = array_unique(flatten_array($group));
}
return $groups;
}
function unique_group_by_directory_prefix($strings) {
$groups = group_array_by_directory_prefix($strings);
// sort descending on key length
uksort($groups, function($left, $right) {
return strlen($right) - strlen($left);
});
// starting from the top, perform the set difference to remove duplication
// across groups
$current = reset($groups);
$unique_groups = array(key($groups) => $current);
$next = next($groups);
while ($next) {
$diff = array_diff($next, $current);
$unique_groups[key($groups)] = $diff;
$current = $next;
$next = next($groups);
}
return $unique_groups;
}
$paths = array(
'a/b/c',
'a/b/d',
'a/e',
'f',
);
var_dump(unique_group_by_directory_prefix($paths));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment