Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@am-
Last active October 22, 2017 08:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save am-/2463986 to your computer and use it in GitHub Desktop.
Save am-/2463986 to your computer and use it in GitHub Desktop.
Searching for many strings in one string using Tries
<?php
function mkTrie($elements, $all = true) {
$trie = array('' => false);
if(!$all) {
$tree = array('' => array('parent' => null, 'successors' => array()));
}
foreach($elements as $element) {
$key = '';
for($i = 0, $l = strlen($element); $i < $l; $i++) {
$parent = $key;
$key .= $element[$i];
if(!$all && !in_array($element[$i], $tree[$parent]['successors'])) {
$tree[$parent]['successors'][] = $element[$i];
}
if(!isset($trie[$key])) {
$trie[$key] = false;
if(!$all) {
$tree[$key] = array('parent' => $parent, 'successors' => array());
}
}
}
$trie[$key] = true;
}
if($all) {
return array(&$trie, array());
} else {
return array(&$trie, &$tree);
}
}
function prune(&$trie, &$tree, $key) {
$pos = strlen($key)-1;
while(!$trie[$key] && empty($tree[$key]['successors'])) {
$parent = $tree[$key]['parent'];
if(!empty($parent)) {
unset($tree[$parent]['successors'][array_search($key[$pos], $tree[$parent]['successors'])]);
}
unset($tree[$key]);
unset($trie[$key]);
$key = $parent;
$pos--;
}
}
function search($text, $patterns, $all = true) {
$result = array_fill_keys($patterns, $all ? array() : null);
list($trie, $tree) = mkTrie($patterns, $all);
$list = array();
for($i = 0, $l = strlen($text); $i < $l; $i++) {
$list[] = '';
foreach($list as $index => $key) {
if(!isset($list[$index])) {
continue;
}
$key = $list[$index] .= $text[$i];
if(!isset($trie[$key])) {
unset($list[$index]);
} else if($trie[$key]) {
$position = 1 + $i - strlen($key);
if(!$all) {
$result[$key] = $position;
$trie[$key] = false;
prune($trie, $tree, $key);
} else {
$result[$key][] = $position;
}
}
}
}
return $result;
}
function searchAndPrint($longText, $searchTexts, $all) {
$results = search($longText, $searchTexts, $all);
if(!$all) {
foreach($searchTexts as $text) {
echo "$results[$text]\n";
}
} else {
foreach($searchTexts as $text) {
echo implode(',', $results[$text]) . "\n";
}
$sum = array_sum(array_map('count', $results));
$avg = $sum/count($searchTexts);
echo "Total matches: $sum\n";
echo "Avg. matches: $avg\n";
}
}
@jonasermert
Copy link

Too slow!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment