Skip to content

Instantly share code, notes, and snippets.

@PerpetualBeta
Created November 3, 2012 23:39
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save PerpetualBeta/4009338 to your computer and use it in GitHub Desktop.
Save PerpetualBeta/4009338 to your computer and use it in GitHub Desktop.
Adds a "fuzzy" search suggestion to the i18n_search plug-in for the GetSimple CMS
<?php
/**
* Fuzzy Search Suggestion Snippet for i18n_search plug-in for the GetSimple CMS
*
* Sometimes a search query produces no matches. This is occassionaly due to a
* miss-spelling in the query terms. This routine will attempt to offer a search
* suggestion to the user when a search results in no matches.
*
* For example:
*
* Sorry, I didn't find anything that matched your query: "typografy behavier"
* Did you mean: typography behaviour
*
* The script will also link the suggested search terms back to the search
* engine. In most cases the suggestion will resolve to a positive query.
* However, it's not (yet) fool-proof and will occasionally link a query that,
* itself returns no results.
*
* I have only tested this with English words. As the script uses the metaphone
* algorithm it's likely that it won't work too well for non-English languages.
*
* Still - it's better than nothing! :-)
*
* The script is running on my own website at http://darkblue.sdf.org/ - so you
* can try it out there.
*
* @author Jonathan M. Hollin <darkblue@sdf.lonestar.org>
* @copyright Copyleft: http://www.gnu.org/copyleft/copyleft.html
*/
function stripColons(&$item, &$key) {
$temp = explode(':', $item);
$item = $temp[0];
}
// Search Suggestions
$candidates = array();
$individualWords = explode(' ', $words);
$suggestions = array();
// Process the word corpus
if ( !file_exists(GSDATAOTHERPATH . I18N_WORD_INDEX) ) create_i18n_search_index();
$corpus = array();
$f = fopen(GSDATAOTHERPATH . I18N_WORD_INDEX, 'r');
while ( ($line = fgets($f)) !== false ) {
$items = explode(' ', $line);
foreach ( $individualWords as $checkWord ) {
if ($items[0] === $checkWord) {
// Exact match!
$items['hit'] = 1;
$items['pattern'] = $checkWord;
array_walk_recursive($items, 'stripColons');
$candidates[] = $items;
if ( ($key = array_search($items[0], $individualWords) ) !== false) unset($individualWords[$key]);
continue;
} else {
// Looking for possible replacements
$lev = levenshtein( metaphone($checkWord), metaphone($items[0]) );
if ($lev < 1) {
array_walk_recursive($items, 'stripColons');
$items['pattern'] = $checkWord;
$items['replace'] = $checkWord . '|' . $items[0];
similar_text($items[0], $checkWord, $items['confidence']);
$candidates[] = $items;
}
}
}
}
fclose($f);
// Restore $individualWords
$individualWords = explode(' ', $words);
// Clean up $candidates
foreach ($candidates as $k => $v) {
unset($key);
if ( array_key_exists('hit', $v) ) $key = $v[0];
if ($key) foreach ($candidates as $y => $z) if ( ($z['pattern'] === $key) && ($y !== $k) ) unset($candidates[$y]);
}
foreach ($candidates as $k => $v) {
unset($candidates[$k]['pattern']);
unset($candidates[$k]['hit']);
}
$candidates = array_values($candidates);
// Find match combinations that actually resolve to a page
$result = array();
if ( (count($candidates) >= 2) && (count($individualWords) !== 1) ) {
// Start from the first array, and continue through all of them.
for ($i = 0; $i < count($candidates); $i ++) {
// Start with the current array of $i, +1. So that collisions never occur.
for ($j = $i + 1; $j < count($candidates); $j ++) {
// If there are common values (the array_intersect() function returns a non-empty array
if (count(array_intersect($candidates[$i], $candidates[$j])) !== 0) {
// Add the first array (if it's not there already)
if (!in_array($candidates[$i], $result)) $result[] = $candidates[$i];
// Add the second array (if it's not there already)
if (!in_array($candidates[$j], $result)) $result[] = $candidates[$j];
}
}
}
} else {
if (count($individualWords) === 1) $result = $candidates;
}
unset($candidates);
// Compute the best suggestion
$condidence = 0;
$sentence = $replacements = array();
if (count($result)) {
foreach ($result as $k => $v) {
$replace = (isset($v['replace'])) ? explode('|', $v['replace']) : false;
if (count($replace) === 2) {
$key = array_search($replace[0], $individualWords);
$confidence = ($v['confidence'] > $confidence) ? $v['confidence'] : $confidence;
if ($replace && is_array($replace)) {
if (isset($replacements[$replace[0]])) {
if ($confidence > $replacements[$replace[0]]['confidence']) {
$replacements[$replace[0]]['confidence'] = $v['confidence'];
$replacements[$replace[0]]['with'] = $replace[1];
}
} else {
$replacements[$replace[0]]['confidence'] = $v['confidence'];
$replacements[$replace[0]]['with'] = $replace[1];
}
}
}
}
unset($result);
// Render the suggestion
foreach ($individualWords as $word) {
if (isset($replacements[$word])) {
$sentence[] = '<span style="font-style: italic;">' . $replacements[$word]['with'] . '</span>';
} else {
$sentence[] = $word;
}
}
unset($individualWords);
unset($replacements);
if (count($sentence)) echo '<p class="search-no-results">Did you mean: <a href="/search?words=' . strip_tags(implode('+', $sentence)) . '">' . implode(' ', $sentence) . '</a></p>';
}
?>
@PerpetualBeta
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment