Skip to content

Instantly share code, notes, and snippets.

@Geczy
Created May 8, 2015 00:59
Show Gist options
  • Save Geczy/58ade6af220699c4fa1b to your computer and use it in GitHub Desktop.
Save Geczy/58ade6af220699c4fa1b to your computer and use it in GitHub Desktop.
function extract_common_words($string, $max_count = 10) {
$string = preg_replace('/ss+/i', '', $string); // replace whitespace
$string = preg_replace('/[^a-zA-Z -]/', '', $string); // only take alphabet characters, but keep the spaces and dashes too…
$string = strtolower($string); // make it lowercase
$matched = array();
$tok = strtok($string, " ");
while ($tok !== false) {
if ( !empty($tok) && !in_array(strtolower($tok), $this->stopWords) && strlen($tok) > 3 ) {
$matched[] = $tok;
}
$tok = strtok(" ");
}
$frequency = array_count_values($matched);
arsort($frequency);
$keywords = array_slice($frequency, 0, $max_count);
return $keywords;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment