Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Adds a "fuzzy" search suggestion to the i18n_search plug-in for the GetSimple CMS
<?php
/**
* Fuzzy Search Suggestion Snippet for i18n_search plug-in for the GetSimple CMS
*
* Sometimes a search query produces no matches. This is occassionaly due to a
* miss-spelling in the query terms. This routine will attempt to offer a search
* suggestion to the user when a search results in no matches.
*
* For example:
*
* Sorry, I didn't find anything that matched your query: "typografy behavier"
* Did you mean: typography behaviour
*
* The script will also link the suggested search terms back to the search
* engine. In most cases the suggestion will resolve to a positive query.
* However, it's not (yet) fool-proof and will occasionally link a query that,
* itself returns no results.
*
* I have only tested this with English words. As the script uses the metaphone
* algorithm it's likely that it won't work too well for non-English languages.
*
* Still - it's better than nothing! :-)
*
* The script is running on my own website at http://darkblue.sdf.org/ - so you
* can try it out there.
*
* @author Jonathan M. Hollin <darkblue@sdf.lonestar.org>
* @copyright Copyleft: http://www.gnu.org/copyleft/copyleft.html
*/
function stripColons(&$item, &$key) {
$temp = explode(':', $item);
$item = $temp[0];
}
// Search Suggestions
$candidates = array();
$individualWords = explode(' ', $words);
$suggestions = array();
// Process the word corpus
if ( !file_exists(GSDATAOTHERPATH . I18N_WORD_INDEX) ) create_i18n_search_index();
$corpus = array();
$f = fopen(GSDATAOTHERPATH . I18N_WORD_INDEX, 'r');
while ( ($line = fgets($f)) !== false ) {
$items = explode(' ', $line);
foreach ( $individualWords as $checkWord ) {
if ($items[0] === $checkWord) {
// Exact match!
$items['hit'] = 1;
$items['pattern'] = $checkWord;
array_walk_recursive($items, 'stripColons');
$candidates[] = $items;
if ( ($key = array_search($items[0], $individualWords) ) !== false) unset($individualWords[$key]);
continue;
} else {
// Looking for possible replacements
$lev = levenshtein( metaphone($checkWord), metaphone($items[0]) );
if ($lev < 1) {
array_walk_recursive($items, 'stripColons');
$items['pattern'] = $checkWord;
$items['replace'] = $checkWord . '|' . $items[0];
similar_text($items[0], $checkWord, $items['confidence']);
$candidates[] = $items;
}
}
}
}
fclose($f);
// Restore $individualWords
$individualWords = explode(' ', $words);
// Clean up $candidates
foreach ($candidates as $k => $v) {
unset($key);
if ( array_key_exists('hit', $v) ) $key = $v[0];
if ($key) foreach ($candidates as $y => $z) if ( ($z['pattern'] === $key) && ($y !== $k) ) unset($candidates[$y]);
}
foreach ($candidates as $k => $v) {
unset($candidates[$k]['pattern']);
unset($candidates[$k]['hit']);
}
$candidates = array_values($candidates);
// Find match combinations that actually resolve to a page
$result = array();
if ( (count($candidates) >= 2) && (count($individualWords) !== 1) ) {
// Start from the first array, and continue through all of them.
for ($i = 0; $i < count($candidates); $i ++) {
// Start with the current array of $i, +1. So that collisions never occur.
for ($j = $i + 1; $j < count($candidates); $j ++) {
// If there are common values (the array_intersect() function returns a non-empty array
if (count(array_intersect($candidates[$i], $candidates[$j])) !== 0) {
// Add the first array (if it's not there already)
if (!in_array($candidates[$i], $result)) $result[] = $candidates[$i];
// Add the second array (if it's not there already)
if (!in_array($candidates[$j], $result)) $result[] = $candidates[$j];
}
}
}
} else {
if (count($individualWords) === 1) $result = $candidates;
}
unset($candidates);
// Compute the best suggestion
$condidence = 0;
$sentence = $replacements = array();
if (count($result)) {
foreach ($result as $k => $v) {
$replace = (isset($v['replace'])) ? explode('|', $v['replace']) : false;
if (count($replace) === 2) {
$key = array_search($replace[0], $individualWords);
$confidence = ($v['confidence'] > $confidence) ? $v['confidence'] : $confidence;
if ($replace && is_array($replace)) {
if (isset($replacements[$replace[0]])) {
if ($confidence > $replacements[$replace[0]]['confidence']) {
$replacements[$replace[0]]['confidence'] = $v['confidence'];
$replacements[$replace[0]]['with'] = $replace[1];
}
} else {
$replacements[$replace[0]]['confidence'] = $v['confidence'];
$replacements[$replace[0]]['with'] = $replace[1];
}
}
}
}
unset($result);
// Render the suggestion
foreach ($individualWords as $word) {
if (isset($replacements[$word])) {
$sentence[] = '<span style="font-style: italic;">' . $replacements[$word]['with'] . '</span>';
} else {
$sentence[] = $word;
}
}
unset($individualWords);
unset($replacements);
if (count($sentence)) echo '<p class="search-no-results">Did you mean: <a href="/search?words=' . strip_tags(implode('+', $sentence)) . '">' . implode(' ', $sentence) . '</a></p>';
}
?>
@PerpetualBeta

This comment has been minimized.

Copy link
Owner Author

@PerpetualBeta PerpetualBeta commented Nov 4, 2012

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.