Skip to content

Instantly share code, notes, and snippets.

@Balaami
Created April 2, 2014 15:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Balaami/9936840 to your computer and use it in GitHub Desktop.
Save Balaami/9936840 to your computer and use it in GitHub Desktop.
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Scanner;
import java.util.TreeSet;
public class Ralph_Essay {
static private HashMap<String,Integer> AllWord = new HashMap<String, Integer>();
static private HashSet<String> CommWord = new HashSet<>();
static private ArrayList<String> LonWord = new ArrayList<>();
public static void main(String args [] ) throws FileNotFoundException{
Ralph_Essay.Common(); //CommWord is a set with all the word in common-words.txt
int [] DocData = Ralph_Essay.ScanFile();
Ralph_Essay.OrderData(DocData[0],DocData[1]);
}
/**
*
* @param LongWord
* order the data , find the longest words, and print all
* @param CountWords
*/
private static void OrderData(int CountWords, int LongWord) {
TreeSet<String> OrderData = new TreeSet<>(new Comparator<String>() {
public int compare(String o1, String o2) {
return -1*AllWord.get(o1).compareTo(AllWord.get(o2)) ;
}});
for (String Word : AllWord.keySet()){
if (Word.length()== LongWord) LonWord.add(Word);
OrderData.add(Word);}
//print all
System.out.println("total word count after filtering -\nunique words: " + OrderData.size() + " Total number : " + CountWords);
System.out.println("\t**************");
System.out.println("highest occurring word: "+ OrderData.first() + " "+AllWord.get(OrderData.first()));
System.out.println("\t**************");
System.out.println("longest word(s) and its / their length:");
for (String item : LonWord){
System.out.print("\t"+item+"\t");}
System.out.println("");
System.out.println("\t**************");
System.out.println("most occurring to least occurring:");
for ( String s : OrderData){
System.out.println("\t"+s+"\t");}
}
/**
*
* @return scan common-words.txt and return the common words, as a set.
* @throws FileNotFoundException
*/
private static void Common() throws FileNotFoundException {
Scanner ScanComm = new Scanner(new File("common-words.txt")); //create a scanner for the file
while (ScanComm.hasNext()){
CommWord.addAll(Arrays.asList(ScanComm.nextLine().split("[^a-zA-Z]+"))); // i go over the text line by line and add to set
}
ScanComm.close();
return;
}
/**
* Scan the Txt file and makes it a Map.
* @return [Number of word, Longest word Len]
* @throws FileNotFoundException
*/
private static int [] ScanFile() throws FileNotFoundException{
Scanner ScanDoc = new Scanner(new File("essays-first-series.txt")); //create the scanner for the file
String Line;
String [] LineArr;
Integer CountWord;
int CountAll = 0,LongestLen=0;
// Start going over the Document
while (ScanDoc.hasNext()){
Line = ScanDoc.nextLine();
LineArr = Line.split("[^a-zA-Z]+");
for (String Word : LineArr){
if ((!(CommWord.contains(Word.toLowerCase()) )&& Word.length()>0)){
if (!(AllWord.containsKey(Word))){
CountWord = 1;
CountAll+=1;}
else{
CountWord = AllWord.get(Word)+1;}
if (Word.length()>LongestLen){
LongestLen = Word.length();}
AllWord.put(Word, CountWord);}}}
ScanDoc.close(); //finish reading - closing the scanner.
int [] DocData = {CountAll,LongestLen};
return DocData;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment