Skip to content

Instantly share code, notes, and snippets.

@vdubedout
Last active August 6, 2016 17:04
Show Gist options
  • Save vdubedout/f29b6411ebde8cca971f50ab55ecb42c to your computer and use it in GitHub Desktop.
Save vdubedout/f29b6411ebde8cca971f50ab55ecb42c to your computer and use it in GitHub Desktop.
Data Structures Course, Week 1 Assignment. No matcher pattern. https://www.coursera.org/learn/data-structures-optimizing-performance/home/week/1 #Course
package document;
import java.util.List;
/**
* A naive implementation of the Document abstract class.
* @author UC San Diego Intermediate Programming MOOC team
*/
public class BasicDocument extends Document
{
/** Create a new BasicDocument object
*
* @param text The full text of the Document.
*/
public BasicDocument(String text)
{
super(text);
}
/**
* Get the number of words in the document.
* A "word" is defined as a contiguous string of alphabetic characters
* i.e. any upper or lower case characters a-z or A-Z. This method completely
* ignores numbers when you count words, and assumes that the document does not have
* any strings that combine numbers and letters.
*
* Check the examples in the main method below for more information.
*
* This method should process the entire text string each time it is called.
*
* @return The number of words in the document.
*/
@Override
public int getNumWords()
{
return getWords().size();
}
/***
* Get the words of a document
* @return a list of document words
*/
public List<String> getWords(){
return getTokens("[A-Za-z]+");
}
/**
* Get the number of sentences in the document.
* Sentences are defined as contiguous strings of characters ending in an
* end of sentence punctuation (. ! or ?) or the last contiguous set of
* characters in the document, even if they don't end with a punctuation mark.
*
* Check the examples in the main method below for more information.
*
* This method should process the entire text string each time it is called.
*
* @return The number of sentences in the document.
*/
@Override
public int getNumSentences()
{
return getTokens("[^.!?]+").size();
}
/**
* Get the total number of syllables in the document (the stored text).
* To count the number of syllables in a word, it uses the following rules:
* Each contiguous sequence of one or more vowels is a syllable,
* with the following exception: a lone "e" at the end of a word
* is not considered a syllable unless the word has no other syllables.
* You should consider y a vowel.
*
* Check the examples in the main method below for more information.
*
* This method should process the entire text string each time it is called.
*
* @return The number of syllables in the document.
*/
@Override
public int getNumSyllables()
{
int totalSyllables = 0;
List<String> words = getWords();
for(String word: words){
totalSyllables += countSyllables(word);
}
return totalSyllables;
}
/* The main method for testing this class.
* You are encouraged to add your own tests. */
public static void main(String[] args)
{
/* Each of the test cases below uses the method testCase. The first
* argument to testCase is a Document object, created with the string shown.
* The next three arguments are the number of syllables, words and sentences
* in the string, respectively. You can use these examples to help clarify
* your understanding of how to count syllables, words, and sentences.
*/
testCase(new BasicDocument("This is a test. How many??? "
+ "Senteeeeeeeeeences are here... there should be 5! Right?"),
16, 13, 5);
testCase(new BasicDocument(""), 0, 0, 0);
testCase(new BasicDocument("sentence, with, lots, of, commas.! "
+ "(And some poaren)). The output is: 7.5."), 15, 11, 4);
testCase(new BasicDocument("many??? Senteeeeeeeeeences are"), 6, 3, 2);
testCase(new BasicDocument("Here is a series of test sentences. Your program should "
+ "find 3 sentences, 33 words, and 49 syllables. Not every word will have "
+ "the correct amount of syllables (example, for example), "
+ "but most of them will."), 49, 33, 3);
testCase(new BasicDocument("Segue"), 2, 1, 1);
testCase(new BasicDocument("Sentence"), 2, 1, 1);
testCase(new BasicDocument("Sentences?!"), 3, 1, 1);
testCase(new BasicDocument("Lorem ipsum dolor sit amet, qui ex choro quodsi moderatius, nam dolores explicari forensibus ad."),
32, 15, 1);
}
}
package document;
/**
* A class that represents a text document
* @author UC San Diego Intermediate Programming MOOC team
*/
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public abstract class Document {
private String text;
/** Create a new document from the given text.
* Because this class is abstract, this is used only from subclasses.
* @param text The text of the document.
*/
protected Document(String text)
{
this.text = text;
}
/** Returns the tokens that match the regex pattern from the document
* text string.
* @param pattern A regular expression string specifying the
* token pattern desired
* @return A List of tokens from the document text that match the regex
* pattern
*/
protected List<String> getTokens(String pattern)
{
return getTokens(text, pattern);
}
/***
* Returns the tokens that match the regex pattern from the text provided
* @param textToAnalyse text to match
* @param pattern Regular expression string specifying the token pattern desired
* @return A List of tokens from the text provided that match the regex
pattern
*/
private List<String> getTokens(String textToAnalyse, String pattern){
ArrayList<String> tokens = new ArrayList<String>();
Pattern tokSplitter = Pattern.compile(pattern);
Matcher m = tokSplitter.matcher(textToAnalyse);
while (m.find()) {
tokens.add(m.group());
}
return tokens;
}
/** This is a helper function that returns the number of syllables
* in a word. You should write this and use it in your
* BasicDocument class.
*
* You will probably NOT need to add a countWords or a countSentences
* method here. The reason we put countSyllables here because we'll
* use it again next week when we implement the EfficientDocument class.
*
* For reasons of efficiency you should not create Matcher or Pattern
* objects inside this method. Just use a loop to loop through the
* characters in the string and write your own logic for counting
* syllables.
*
* @param word The word to count the syllables in
* @return The number of syllables in the given word, according to
* this rule: Each contiguous sequence of one or more vowels is a syllable,
* with the following exception: a lone "e" at the end of a word
* is not considered a syllable unless the word has no other syllables.
* You should consider y a vowel.
*/
protected int countSyllables(String word)
{
int syllablesNumber = 0;
String[] splittedString = word.split("[^aeiouyAEIOUY]+");
for(String myChar : splittedString){
if(!myChar.isEmpty()){ // clean empty string from the split
syllablesNumber++;
}
}
if (eIsLastVowel(word, syllablesNumber, splittedString)) {
syllablesNumber--;
}
return syllablesNumber;
}
/***
* Check that "e" is the last vowel of the word, that the word is more than two syllables and that "e" is alone at the end (not "ue" "ae" or other)
* @param word
* @param syllablesNumber
* @param splittedString
* @return
*/
private boolean isETheLastVowel(String word, int syllablesNumber, List<String> splittedString) {
return syllablesNumber > 1 && word.endsWith("e") && splittedString.get(splittedString.size() - 1).equals("e");
}
/** A method for testing
*
* @param doc The Document object to test
* @param syllables The expected number of syllables
* @param words The expected number of words
* @param sentences The expected number of sentences
* @return true if the test case passed. False otherwise.
*/
public static boolean testCase(Document doc, int syllables, int words, int sentences)
{
System.out.println("Testing text: ");
System.out.print(doc.getText() + "\n....");
boolean passed = true;
int syllFound = doc.getNumSyllables();
int wordsFound = doc.getNumWords();
int sentFound = doc.getNumSentences();
if (syllFound != syllables) {
System.out.println("\nIncorrect number of syllables. Found " + syllFound
+ ", expected " + syllables);
passed = false;
}
if (wordsFound != words) {
System.out.println("\nIncorrect number of words. Found " + wordsFound
+ ", expected " + words);
passed = false;
}
if (sentFound != sentences) {
System.out.println("\nIncorrect number of sentences. Found " + sentFound
+ ", expected " + sentences);
passed = false;
}
if (passed) {
System.out.println("passed.\n");
}
else {
System.out.println("FAILED.\n");
}
return passed;
}
/** Return the number of words in this document */
public abstract int getNumWords();
/** Return the number of sentences in this document */
public abstract int getNumSentences();
/** Return the number of syllables in this document */
public abstract int getNumSyllables();
/** Return the entire text of this document */
public String getText()
{
return this.text;
}
/** return the Flesch readability score of this document */
public double getFleschScore()
{
return 206.835 - 1.015*((float) getNumWords() / (float) getNumSentences()) - 84.6 * ( (float) getNumSyllables() / (float) getNumWords());
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment