Created
February 15, 2014 06:45
-
-
Save StoneCypher/9015456 to your computer and use it in GitHub Desktop.
Explanation of how to count words in PHP (MIT License)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<html><body> | |
<?php | |
// normally i'd write this more densely, but you said you wanted some exposure | |
// so i'm writing it in single concepts per line and annotating it for reference :) | |
// config | |
$shouldToLower = false; | |
// end config, editing below should be unnecessary | |
$wordcount = array(); // number of times each word is seen - initially empty (unlisted words = implicitly 0) | |
$original = file_get_contents('titles.csv'); // get the data from disk | |
$work = ($shouldToLower)? strtolower($original) : $original; // maybe drop uppercase first | |
$normalized = str_replace("\r", "\n", str_replace("\r\n", "\n", $original)); // change dos and mac style returns to unix style | |
$lines = explode("\n", $normalized); // split on returns to individual lines | |
foreach ($lines as $line) { // iterate each line | |
$wl = trim($line); // remove stray spaces from the line = work line | |
$words = explode(" ", $wl); // split into individual words | |
foreach ($words as $word) { // iterate over the words of this line | |
if (isset($wordcount[$word])) { // have we seen this word before? | |
++$wordcount[$word]; // if so bump that count | |
} else { // otherwise | |
$wordcount[$word] = 1; // initialize the count for that word to one | |
} | |
} | |
} | |
echo '<pre>'; | |
print_r($wordcount); | |
echo '</pre>'; | |
?> | |
</body></html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment