Skip to content

Instantly share code, notes, and snippets.

@MwirabuaTimothy
Last active June 9, 2016 15:27
Show Gist options
  • Save MwirabuaTimothy/1e888896844adc7c5e1c5af15a98b1a0 to your computer and use it in GitHub Desktop.
Save MwirabuaTimothy/1e888896844adc7c5e1c5af15a98b1a0 to your computer and use it in GitHub Desktop.
php - sorting an array of words by their relevance to a keyword
<?php
$query = 'ma';
$names = ['Kevin', 'Michael', 'Joseph', 'Mark', 'Edward', 'Velma', 'William', 'Mary', 'Kimani'];
// $target = ['Mark', 'Mary', 'Michael', 'Kimani', 'Velma', 'William', 'Edward', 'Joseph', 'Kevin'];
// $result = ['Mark', 'Mary', 'Kimani', 'Michael', 'Velma', 'William', 'Edward', 'Kevin', 'Joseph'];
// the slight difference is because the longer words have more irrelevant characters
function weights($query) { // return weight of letters determined by their order of appearance
$query = strtolower($query); // eliminate case sensitivity
$length = strlen($query);
$weights = [];
foreach (str_split($query) as $letter) { // loop through query
$weights[$letter] = $length*100;
$length--; // deduct length
}
return $weights;
}
// return weights($query); // ['m'=>200,'a'=>100];
function wordWeight($word, $query_weights) { // return weight of letters determined by their order of appearance
$word = strtolower($word); // eliminate case sensitivity
$weights = []; // relevance weights
$weight = 0; // relevance weight of word
$mismatches = 0;
$letters = str_split($word);
$unique = ''; // track duplicate occurences
foreach ($letters as $letter) { // loop through query
if( !strpos($unique, $letter) ){ // if letter has not been tracked
$unique .= $letter; //track letter
if(isset($query_weights[$letter]) ){ // if letter exists in query
// increment weight of word
$weight += $query_weights[$letter];
$weights[] = $query_weights[$letter];
// subtract number of characters before it appears
$weight -= strpos($word, $letter);
}
else{
$mismatches++; // count mismatches
$weights[] = '-';
}
}
else{
$weight -= 1; // rank down
$weights[] = '-';
}
}
// subtract number of missmatches
$weight -= $mismatches;
return $weight;
// return $weights;
// return $unique;
}
function relevance($words, $query) {
$word_weights = [];
$query_weights = weights($query);
foreach ($words as $word) {
$word_weights[$word] = wordWeight($word, $query_weights);
}
return $word_weights;
}
$weighted = relevance($names, $query);
// return $weighted;
// multisort: first by values(descending) then by keys (alphabetical)
array_multisort(array_values($weighted), SORT_DESC, array_keys($weighted), SORT_ASC, $weighted);
$sorted = array_keys($weighted);
return $sorted;
@MwirabuaTimothy
Copy link
Author

MwirabuaTimothy commented Jun 9, 2016

algorithm / pseudocode

foreach query letter {
    weight of letter = reverse index * 100
}

foreach word {

    foreach letter
        if letter appears in query
            increment weight of letter minus number of characters before it appears
        else
            count missmatches
    }
    - subtract number of missmatches
}


sort words by the final weight
sort words with same weight alphabetically 

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment