Last active
August 29, 2015 13:58
-
-
Save lizkaraffa/9986876 to your computer and use it in GitHub Desktop.
Forum Challenge: Reading and Displaying Data (Part 1); Language Used: PHP; Author: Liz Karaffa
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// Gets essay | |
$text = file_get_contents('http://treehouse-forum-contests.s3.amazonaws.com/visualizing-information/part-one/essays-first-series.txt'); | |
// Removes extra line space and carriage returns, makes essay lowercase and removes all punctuation & numbers | |
$returns = array(chr(10), chr(13), ",--"); | |
$punctuation = array(".", ",", ":", ";", "--", "?", "!", "'", '"', "{", "}", "_", "*", 1, 2, 3, 4, 5, 6, 7, 8, 9, 0); | |
$text = strtolower(str_replace($returns, " ", $text)); | |
$text = str_replace($punctuation, "", $text); | |
// Words to filter out | |
$list = "the, be, to, of, and, a, in, that, have, i, it, for, not, on, with, he, as, you, do, at, this, but, his, by, from, they, we, say, her, she, or, an, will, my, one, all, would, there, their, what, so, up, out, if, about, who, get, which, go, me, when, make, can, like, time, no, just, him, know, take, people, into, year, your, good, some, could, them, see, other, than, then, now, look, only, come, its, over, think, also, back, after, use, two, how, our, work, first, well, way, even, new, want, because, any, these, give, day, most, us, is, was, are, were, says, said, has, had"; | |
// Turns list of words to filter out into an array | |
$words = explode(', ', $list); | |
// Turns essay into array and removes empty elements | |
$text = array_filter(explode(' ', $text)); | |
// Filters out the common words from the text | |
$wordcount = array_diff($text, $words); | |
// Counts how many words there are after common words are filtered out | |
$total = count($wordcount); | |
echo "There are " . $total . " words after the most common words are filtered out. <br />"; | |
// Counts the occurence of each word | |
$occurences = array_count_values($wordcount); | |
// Returns the name of the most common word | |
$most_common = array_search(max($occurences), $occurences); | |
echo 'The most common word in the essay is: "' . $most_common . '." <br />'; | |
// Returns the string length of the longest words | |
$lengths = array_map('strlen', $wordcount); | |
$longest_length = max($lengths); | |
// Returns the names of the longest words | |
$combined = array_combine($wordcount, $lengths); | |
$long_words = array_keys($combined, $longest_length); | |
echo 'The longest words are ' . $longest_length . ' characters in length and they are "' . $long_words[0] . '" and "' . $long_words[1] . '." <br />'; | |
// Orders the list of words from highest occurrence to least | |
arsort($occurences); | |
$order = array_keys($occurences); | |
?> | |
<p> The following are the most common to least common words in the essay: </p> | |
<ul> | |
<?php | |
foreach ($order as $word) { ?> | |
<li> <?php echo $word; ?> </li> | |
<?php } ?> | |
</ul> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment