Skip to content

Instantly share code, notes, and snippets.

@mavmedved
Created February 14, 2019 16:39
Show Gist options
  • Save mavmedved/28412141545941a4c3debe00d29f1cf9 to your computer and use it in GitHub Desktop.
Save mavmedved/28412141545941a4c3debe00d29f1cf9 to your computer and use it in GitHub Desktop.
<?php
/**
* @param string $text
* @return array
*/
function getTopFiveWords(string $text) : array
{
mb_internal_encoding("UTF-8");
// (!) optional, just in case...
$text = strip_tags($text);
$text = preg_replace('/[0-9]+/', '', $text);
if (empty($text)){
return [];
}
$symbols = [
'\s', '\.', ',', '\\\'', '\?', '\-', '_', ':', ';',
'\*', '>', '<', '\(', '\)', '@', '#', '\!', '\$', '%', '&',
'\+', '\{', '\}', '\[', '\]', '\\', '~',
];
$regExpSymbols = implode('', $symbols);
$words = preg_split("/[{$regExpSymbols}]{1,}/", $text, -1, PREG_SPLIT_NO_EMPTY);
foreach ($words as $k => $word){
// one symbol != word (optional)
if (mb_strlen($word) <= 1){
unset($words[$k]);
continue;
}
$words[$k] = mb_strtolower($word, 'UTF-8');
}
$wordCounts = [];
foreach ($words as $word){
if (!isset($wordCounts[$word])){
$wordCounts[$word] = 1;
}
else{
$wordCounts[$word] += 1;
}
}
asort($wordCounts, SORT_NUMERIC);
$result = array_slice($wordCounts, count($wordCounts) - 5, 5, true);
return array_reverse($result, true);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment