Skip to content

Instantly share code, notes, and snippets.

@greenido
Last active April 23, 2019 20:20
Show Gist options
  • Save greenido/875ea8e2626b864384efc932bfc52884 to your computer and use it in GitHub Desktop.
Save greenido/875ea8e2626b864384efc932bfc52884 to your computer and use it in GitHub Desktop.
Words Frequency Counter - Simple and fast
<?php
/**
* @author: Ido Green
* @date 6/28/2015
* @desc: Analyze the words from our firebase DB.
* Steps:
* 1. Fetch text from the DB
* 2. Split into words
* 3. Remove 2 character words and stopwords
* 4. Determine word frequency + density
* 5. Determine word prominence
* 6. Determine word containers
*/
class AnalyzeWords {
private $db;
/**
* Ctor
*/
function __construct() {
$this->db = new DB();
}
function analyze() {
$result = $this->db->getItems();
$fullText = "";
foreach ($result as $val) {
$title = $val->title;
$fullText .= $title;
}
$words = mb_split( ' +', $fullText );
$keywords = array();
foreach ($words as $word) {
$vv = strlen($word);
if (strlen($word) > 4) {
array_push($keywords, $word);
}
}
// $keywords is the $words array after being filtered to contain only words that got 3 chars.
$uniqueWords = array_unique ($keywords);
$uniqueWordCounts = array_count_values ( $keywords );
echo "Words: " . count($words) . " Keywords: " . count($keywords) . " unique Words: " . count($uniqueWords) ;
echo "\n==\n\n-- Unique Word Counts -- \n\nFreq , Word \n";
//ksort($uniqueWordCounts);
array_multisort($uniqueWordCounts, SORT_DESC);
$i = 0;
foreach ($uniqueWordCounts as $key => $value) {
echo "$key , $value \n";
if ($i > 100) {
break;
}
$i++;
}
}
}
//
// Start the party
//
$analyzer = New AnalyzeWords();
$analyzer->analyze();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment