Created
April 2, 2014 15:47
-
-
Save Balaami/9936840 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.File; | |
import java.io.FileNotFoundException; | |
import java.util.ArrayList; | |
import java.util.Arrays; | |
import java.util.Comparator; | |
import java.util.HashMap; | |
import java.util.HashSet; | |
import java.util.Scanner; | |
import java.util.TreeSet; | |
public class Ralph_Essay { | |
static private HashMap<String,Integer> AllWord = new HashMap<String, Integer>(); | |
static private HashSet<String> CommWord = new HashSet<>(); | |
static private ArrayList<String> LonWord = new ArrayList<>(); | |
public static void main(String args [] ) throws FileNotFoundException{ | |
Ralph_Essay.Common(); //CommWord is a set with all the word in common-words.txt | |
int [] DocData = Ralph_Essay.ScanFile(); | |
Ralph_Essay.OrderData(DocData[0],DocData[1]); | |
} | |
/** | |
* | |
* @param LongWord | |
* order the data , find the longest words, and print all | |
* @param CountWords | |
*/ | |
private static void OrderData(int CountWords, int LongWord) { | |
TreeSet<String> OrderData = new TreeSet<>(new Comparator<String>() { | |
public int compare(String o1, String o2) { | |
return -1*AllWord.get(o1).compareTo(AllWord.get(o2)) ; | |
}}); | |
for (String Word : AllWord.keySet()){ | |
if (Word.length()== LongWord) LonWord.add(Word); | |
OrderData.add(Word);} | |
//print all | |
System.out.println("total word count after filtering -\nunique words: " + OrderData.size() + " Total number : " + CountWords); | |
System.out.println("\t**************"); | |
System.out.println("highest occurring word: "+ OrderData.first() + " "+AllWord.get(OrderData.first())); | |
System.out.println("\t**************"); | |
System.out.println("longest word(s) and its / their length:"); | |
for (String item : LonWord){ | |
System.out.print("\t"+item+"\t");} | |
System.out.println(""); | |
System.out.println("\t**************"); | |
System.out.println("most occurring to least occurring:"); | |
for ( String s : OrderData){ | |
System.out.println("\t"+s+"\t");} | |
} | |
/** | |
* | |
* @return scan common-words.txt and return the common words, as a set. | |
* @throws FileNotFoundException | |
*/ | |
private static void Common() throws FileNotFoundException { | |
Scanner ScanComm = new Scanner(new File("common-words.txt")); //create a scanner for the file | |
while (ScanComm.hasNext()){ | |
CommWord.addAll(Arrays.asList(ScanComm.nextLine().split("[^a-zA-Z]+"))); // i go over the text line by line and add to set | |
} | |
ScanComm.close(); | |
return; | |
} | |
/** | |
* Scan the Txt file and makes it a Map. | |
* @return [Number of word, Longest word Len] | |
* @throws FileNotFoundException | |
*/ | |
private static int [] ScanFile() throws FileNotFoundException{ | |
Scanner ScanDoc = new Scanner(new File("essays-first-series.txt")); //create the scanner for the file | |
String Line; | |
String [] LineArr; | |
Integer CountWord; | |
int CountAll = 0,LongestLen=0; | |
// Start going over the Document | |
while (ScanDoc.hasNext()){ | |
Line = ScanDoc.nextLine(); | |
LineArr = Line.split("[^a-zA-Z]+"); | |
for (String Word : LineArr){ | |
if ((!(CommWord.contains(Word.toLowerCase()) )&& Word.length()>0)){ | |
if (!(AllWord.containsKey(Word))){ | |
CountWord = 1; | |
CountAll+=1;} | |
else{ | |
CountWord = AllWord.get(Word)+1;} | |
if (Word.length()>LongestLen){ | |
LongestLen = Word.length();} | |
AllWord.put(Word, CountWord);}}} | |
ScanDoc.close(); //finish reading - closing the scanner. | |
int [] DocData = {CountAll,LongestLen}; | |
return DocData; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment