Skip to content

Instantly share code, notes, and snippets.

@terwanerik
Created March 26, 2019 11:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save terwanerik/d09f2557ca3c28c322451e96a341d9d6 to your computer and use it in GitHub Desktop.
Save terwanerik/d09f2557ca3c28c322451e96a341d9d6 to your computer and use it in GitHub Desktop.
<?php
/**
* Made by Erik Terwan
* erik@erikterwan.com
* https://erikterwan.com
*
* All rights reserved
*
* Created on 2019-03-26
*/
class StringHelper {
/**
* Score a string match
* @param string $string
* @param string $word
* @param null|float $fuzziness
* @return float|int
*/
static function score($string, $word, $fuzziness = null) {
// If the string is equal to the word, perfect match.
if ($string == $word) { return 1; }
// If it's not a perfect match and is empty return 0
if ($word == "") { return 0; }
$runningScore = 0;
$charScore = 0;
$finalScore = 0;
$lString = strtolower($string);
$strLength = strlen($string);
$lWord = strtolower($word);
$wordLength = strlen($word);
$idxOf = null;
$startAt = 0;
$fuzzies = 1;
$fuzzyFactor = null;
// Cache fuzzyFactor for speed increase
if ($fuzziness) { $fuzzyFactor = 1 - $fuzziness; }
// Walk through word and add up scores.
// Code duplication occurs to prevent checking fuzziness inside for loop
if ($fuzziness) {
for ($i = 0; $i < $wordLength; $i+=1) {
$idxOf = strpos($lString, $lWord[$i], $startAt);
if ($idxOf === false) {
$fuzzies += $fuzzyFactor;
} else {
if ($startAt === $idxOf) {
// Consecutive letter & start-of-string Bonus
$charScore = 0.7;
} else {
$charScore = 0.1;
// Acronym Bonus
// Weighing Logic: Typing the first character of an acronym is as if you
// preceded it with two perfect character matches.
if ($string[$idxOf - 1] === ' ') {
$charScore += 0.8;
}
}
// Same case bonus.
if ($string[$idxOf] === $word[$i]) { $charScore += 0.1; }
// Update scores and startAt position for next round of indexOf
$runningScore += $charScore;
$startAt = $idxOf + 1;
}
}
} else {
for ($i = 0; $i < $wordLength; $i+=1) {
$idxOf = strpos($lString, $lWord[$i], $startAt);
if ($idxOf === false) { return 0; }
if ($startAt === $idxOf) {
$charScore = 0.7;
} else {
$charScore = 0.1;
if ($string[$idxOf - 1] === ' ') { $charScore += 0.8; }
}
if ($string[$idxOf] === $word[$i]) { $charScore += 0.1; }
$runningScore += $charScore;
$startAt = $idxOf + 1;
}
}
// Reduce penalty for longer strings.
$finalScore = 0.5 * ($runningScore / $strLength + $runningScore / $wordLength) / $fuzzies;
if (($lWord[0] === $lString[0]) && ($finalScore < 0.85)) {
$finalScore += 0.15;
}
return $finalScore;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment