annikakouhia/PartOfSpeech.java

## PartOfSpeech.java
/*This was a lab I completed with my friend Morgan for our CS10 course this past quarter at Dartmouth
 * Brown University mapped out a huge document made up of tons of sentences from books, magazines, newspapers, etc.
 * and then made a second document where each entry was the part of speech for the corresponding entry in the first
 * document.
 * For this lab, we were given the first and second document from Brown, and then used the Viterbi algorithm to basically
 * "teach" our program how to take in another document (we were given test documents with solutions for class) or user input.
 * and output the corresponding part of speech.
 * To deduce what each word (from the user input or from a document you put in) was in terms of parts of speech, the computer
 * relied on two things. First, was the probability that the word was as certain POS based on what POS the word before it was,
 * and second was the probability that the word was a certain POS based on what the word actually was (like the probability that
 * the word 'trains' is a noun versus a verb). We trained the computer to know these probabilities by reading in and messing with
 * the patterns in the documents from Brown, and then used Viterbi to calculate all the possible "paths" of POSs, before finding the
 * most likely one based on our probabilities, and backtracing to print out the correct path of Parts of Speech at the end.
 * I loved this lab because I thought it was so cool that we were able to use probabilities and algorithms to do something as
 * human as identifying what the part of speech is for a certain word given its context. Upon turning in this lab, we were able to
 * run through the Brown training sentences and calculate the part of speech for a given word with 97% accuracy.
 */


//Morgan and Annika's PS5
//3.3.17
//POS= Part of Speech

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;
import java.math.*;
public class PS5 {

	//Map that keeps track of current part of speech and a map to the next possible parts of speech and the frequencies of going to each
	static Map<String, Map<String, Double>> POStoNextandFreq = new HashMap<String, Map<String, Double>>();

	//Map that keeps track of the current part of speech and a map to the observed words in this part of speech and their frequencies
	static Map<String, Map<String, Double>> POStoWordandFreq = new HashMap<String, Map<String, Double>>();

	//ArrayList full of the different parts of speech read in
	static ArrayList<String> POS = new ArrayList<String>();

	//Array List full of the words read in
	static ArrayList<String> totalWords = new ArrayList<String>();

	//List of the best path per line
	static List<String> bestPath = new ArrayList<String>();

	//Array List of Array Lists. Each individual array is a sentence held as a list of strings and the master array holds all sentences arrays
	static List<ArrayList<String>> sampleSentence = new ArrayList<ArrayList<String>>();

	//our answers is the corresponding parts of speech that we got at the end... All the best paths put together into a list
	static ArrayList<String> ourAnswers = new ArrayList<String>();
	//right answers is the correct answers given to us in order to test our accuracy
	static ArrayList<String> rightAnswers = new ArrayList<String>();


	public static void main(String[] args){
		//creates POStoWordandFreq
		createMapWords();

		//creates POStoNextandFreq
		createMapNext();

		//OPTION 1: RUN THIS (comment out option 2)

		//Go through each sentence and put it through the viterbi algorithm
		for(int i =0; i< sampleSentence.size(); i++) {
			viterbi(sampleSentence.get(i));  //calls viterbi method
		}
		System.out.println(corrections(rightAnswers, ourAnswers)); //prints out the result


		//OPTION 2: RUN THIS (comment out option 1)
		//This is the method that does the personal input instead of reading from a file
		//personalInput();

	}

	//does the personal input sentence tagging
	public static void personalInput(){
		Scanner scan = new Scanner(System.in); //our scanner
		while(scan.hasNextLine()){
			String input = scan.nextLine(); //line the person entered
			if(input == "-1"){ //will keep running till u enter -1
				break;
			}
			else{
				ArrayList<String> sendtoviterbi = new ArrayList<String>(); //array list we will send viterbi
				String[] allWords =input.split(" "); //splitting it by spaces
				for(String w: allWords){ //put all words into the array list
					sendtoviterbi.add(w);
				}
				viterbi(sendtoviterbi); //do viterbi
				System.out.println(bestPath); //print the best path
			}
		}
	}

	//Method that calculates the total correct and the total wrong tags when testing from our given documents
	public static String corrections(List<String> answerKey, List<String> test){
		int same = 0;
		int diff = 0;
		for(int i = 0; i<answerKey.size(); i++){ //goes through each tag
			if(answerKey.get(i).equals(test.get(i))){ //if they are equal increment same
				same+=1;
			}
			else{ //if they are different increment different
				diff+=1;
			}
		}
		String response = "You got " + same + " correct and " + diff + " incorrect"; //create response to print
		return response;
		}

	//This is the viterbi algorithm. It takes in a sentence and tags all the letters
	public static void viterbi(ArrayList sentence){
		bestPath = new ArrayList<String>(); //we will be storing the best path in this array.

		//we will be keeping track of the back track here. The integer is the index in the sentence and the string is the next state and the second string is the current state
		Map<Integer, HashMap<String, String>> backtrace = new HashMap<Integer, HashMap<String,String>>();

		//make a set called currStates which only has "#" so far
		Set<String> currStates = new HashSet<String>();
		currStates.add("#");
		//make a map called currScores which only contains "#" so far
		Map<String, Double> currScores = new HashMap<String,Double>();
		currScores.put("#", 0.0);


		//for i from 0 to # observations - 1
		for(int i = 0; i<sentence.size(); i++){

			//starting the backtrace
			backtrace.put(i, new HashMap<String,String>());

			//nextStates = empty Set
			Set<String> nextStates = new HashSet<String>();

			//nextScores = empty map
			Map<String,Double> nextScores = new HashMap<String,Double>();

			//for each currState in currStates
			for(String currState: currStates){

				//for each transition currState -> nextState
				for(String nextState: POStoNextandFreq.get(currState).keySet()){

					//add nextState to nextStates
					nextStates.add(nextState);

					//nextScore = currScores[currState] +                       // path to here
		                  //transitionScore(currState -> nextState) +     		// take a step to there
		                  //observationScore(observations[i] in nextState) 		// make the observation there
					Double nextScore = currScores.get(currState) + POStoNextandFreq.get(currState).get(nextState);
					if(POStoWordandFreq.get(nextState).containsKey(sentence.get(i))){
						nextScore += POStoWordandFreq.get(nextState).get(sentence.get(i));
					}
					else{
						nextScore-=100;
					}
					//if nextState isn't in nextScores or nextScore is greater than the current nextScores[nextState]
					if(!nextScores.containsKey(nextState) || nextScore>nextScores.get(nextState)){
						//set nextScores[nextState] to nextScore
						nextScores.put(nextState, nextScore);
						//remember that pred of nextState @ i is curr (change the backtrace to the better option)
						backtrace.get(i).put(nextState, currState);

					}
				}
			}
		//update the states
		currStates = nextStates;
		currScores = nextScores;
			}

		double highest = -90000000.0; //very low number as the base case
		String highestPOS = "";
		//go through each of the final scores and find the highest score and which part of speech it corresponds to
		for(String s: currScores.keySet()){

			if(currScores.get(s)>highest){
				highest = currScores.get(s);
				highestPOS = s;
			}
		}
		bestPath.add(highestPOS); //add the part of speech to the best path

		//for each word in the sentence, do the back tracking
		for(int k = sentence.size()-1; k>0; k--){
			bestPath.add(0, backtrace.get(k).get(highestPOS)); //add the highest linked to the front of the list
			highestPOS = backtrace.get(k).get(highestPOS); //reset highest after having it found the previous highest
		}

		//the best path is part of our answers because best path it tagged
		ourAnswers.addAll(bestPath);
				}


	//creates the POStoNextandFreq map
	public static void createMapNext(){

		POStoNextandFreq.put("#", new HashMap<String, Double>());

		//for each part of speech in the list part of speech
		for(int i =0; i< POS.size(); i++){

			String currentPOS = POS.get(i); //get the current part of speech and store it in currentPOS

			if(!POStoNextandFreq.get("#").containsKey(currentPOS)){
				POStoNextandFreq.get("#").put(currentPOS, 1.0);
			}

			//if there is a part of speech after it
			if(i+1 < POS.size()){
				String nextPOS= POS.get(i+1); //set next part of speech to the next part of speech

				//If the map already has the current part of speech
				if(POStoNextandFreq.containsKey(currentPOS)){
					//and if the current part of speech has the next part of speech in its map already
					if(POStoNextandFreq.get(currentPOS).containsKey(nextPOS)){
						//temp freq is the current frequency
						Double tempfreq = POStoNextandFreq.get(currentPOS).get(nextPOS);
						tempfreq++;  //update the current frequency by 1
						//reset the value of that next part of speech compared to the current part of speech
						POStoNextandFreq.get(currentPOS).put(nextPOS, tempfreq);
					}
					else{ //otherwise create a new entry in the current POS's map of next POSs and set the frequency to 1
						POStoNextandFreq.get(currentPOS).put(nextPOS, 1.0);
					}
				}
				//if the map does not already have the current part of speech
				else{
					//put the current part of speech in with a new map
					POStoNextandFreq.put(currentPOS, new HashMap<String, Double>());
					//in the new map put the next part of speech in and set its frequency to 1
					POStoNextandFreq.get(currentPOS).put(nextPOS, 1.0);
				}
			}
		}


		//Changing "frequencies" from literal number of times seen, to the logarithms of percent of time seen to keep numbers small
		//when going through Viterbi algorithm
		for(String pos: POStoNextandFreq.keySet()){
			Double total = 0.0; //total counts the total time the part of speech was used
			//for each part of speech that comes next from the current part of speech
			for(String w: POStoNextandFreq.get(pos).keySet()){
				//add the frequency of the next part of speech
				total+= POStoNextandFreq.get(pos).get(w);
			}
			//for each part of speech that comes next from the current part of speech
			for(String w: POStoNextandFreq.get(pos).keySet()){
				//current frequency is the specific next part of speech divided by the total
				 double currfreq = POStoNextandFreq.get(pos).get(w)/total;
				 //we then take the log of that
				 double newfreq = Math.log(currfreq);
				 //put the log back into the map
				 POStoNextandFreq.get(pos).put(w, newfreq);
			}
		}

	}

	//This creates the POStoWordandFreq map
	public static void createMapWords(){
		//puts all seen parts of speech in a list
		createPOS();
		//puts all seen words in a list
		createAllWords();

		//for each part of speech in the part of speech list
		for(int i= 0; i<POS.size(); i++){

				//this is the current part of speech
				String currentPOS = POS.get(i);

				//If the map has the current part of speech
				if(POStoWordandFreq.containsKey(currentPOS)){
					if(i+1 < totalWords.size()){ //if this isn't the last POS/Word combo in our training files
						//if the part of speech contains the current word already
						if(POStoWordandFreq.get(currentPOS).containsKey(totalWords.get(i))){
							//get the current word frequency
							double curr = POStoWordandFreq.get(currentPOS).get(totalWords.get(i));
							curr++; //increase the frequency by 1.0
							POStoWordandFreq.get(currentPOS).put(totalWords.get(i), curr);  //update the frequency
						}
						//else this POS has never been associated with the corresponding word, so put frequency to 1
						else{
							POStoWordandFreq.get(currentPOS).put(totalWords.get(i), 1.0);
						}
					}
				}

				//otherwise, if the part of speech isn't already in the map
				else{
					//add the part of speech and create a new "sub"map (since this is a map that has a second map as its key)
					POStoWordandFreq.put(currentPOS, new HashMap<String, Double>());
					//add the current word to that part of speech and set its frequency to 1
					POStoWordandFreq.get(currentPOS).put(totalWords.get(i), 1.0);
				}
		}

		//Changing "frequencies" from literal number of times seen, to the logarithms of percent of time seen to keep numbers small
		//when going through Viterbi algorithm
		for(String pos: POStoWordandFreq.keySet()){
			Double total = 0.0; //this is used to calculate the total
			//for each of the words in the part of speech
			for(String w: POStoWordandFreq.get(pos).keySet()){
				total+= POStoWordandFreq.get(pos).get(w);  //update the total
			}
			//for each of the words in the part of speech
			for(String w: POStoWordandFreq.get(pos).keySet()){
				 //update current frequency by dividing by the total
				 double currfreq = POStoWordandFreq.get(pos).get(w)/total;
				 //new frequency is the log of that because don't want values to be too small
				 double newfreq = Math.log(currfreq);
				 //reset the value with the log
				 POStoWordandFreq.get(pos).put(w, newfreq);
			}
		}
	}

	//Fills in our list of words from a file we have been given
	public static void createAllWords(){
		BufferedReader read;

		try{
			read = new BufferedReader(new FileReader("inputs/simple-train-sentences.txt"));
			//word is the next line. Line consists of a whole sentence
			String word = read.readLine();
			while(word !=null){ //while word is a word and not null

				//make an array list of the current words
				ArrayList<String> curr = new ArrayList<String>();

				//split all the words by spaces
				String[] allWords =word.split(" ");

				//for all of the words in the current word
				for(String w: allWords){
					curr.add(w.toLowerCase()); //make them lower case to avoid errors
					totalWords.add(w.toLowerCase()); //adding to total words because that will be used in making the word map
				}
				sampleSentence.add(curr); //adding curr to array list of lists of each sentence so we can traverse a document sentence by sentence instead of as an entire document
				word = read.readLine();  //read in next line
			}
			read.close(); //close the reader so that empty file doesn't come back to crash this lab when we only have a few more hours to finish it
		}
		catch(IOException e){
			System.out.print("Can't open file");
		}
	}

	//This method fills a list of all the Part of Speech read in from a text document. More detailed comments in method above they are pretty similar
	public static void createPOS(){
		BufferedReader read;
		try{ //try catch in case of file errors
			read = new BufferedReader(new FileReader("inputs/simple-train-tags.txt"));  //brown-train-tags given to us
			String word = read.readLine();// reading in the line
			while(word !=null){ //go through each line and add each specific word split by spaces
				String[] allWords = word.split(" ");
				for(String w: allWords){
					POS.add(w);
					rightAnswers.add(w);
				}
				word = read.readLine();
			}
			read.close();
		}
		//exception
		catch(IOException e){
			System.out.print("Can't open file");
		}
	}
}
	/*This was a lab I completed with my friend Morgan for our CS10 course this past quarter at Dartmouth
	* Brown University mapped out a huge document made up of tons of sentences from books, magazines, newspapers, etc.
	* and then made a second document where each entry was the part of speech for the corresponding entry in the first
	* document.
	* For this lab, we were given the first and second document from Brown, and then used the Viterbi algorithm to basically
	* "teach" our program how to take in another document (we were given test documents with solutions for class) or user input.
	* and output the corresponding part of speech.
	* To deduce what each word (from the user input or from a document you put in) was in terms of parts of speech, the computer
	* relied on two things. First, was the probability that the word was as certain POS based on what POS the word before it was,
	* and second was the probability that the word was a certain POS based on what the word actually was (like the probability that
	* the word 'trains' is a noun versus a verb). We trained the computer to know these probabilities by reading in and messing with
	* the patterns in the documents from Brown, and then used Viterbi to calculate all the possible "paths" of POSs, before finding the
	* most likely one based on our probabilities, and backtracing to print out the correct path of Parts of Speech at the end.
	* I loved this lab because I thought it was so cool that we were able to use probabilities and algorithms to do something as
	* human as identifying what the part of speech is for a certain word given its context. Upon turning in this lab, we were able to
	* run through the Brown training sentences and calculate the part of speech for a given word with 97% accuracy.
	*/


	//Morgan and Annika's PS5
	//3.3.17
	//POS= Part of Speech

	import java.io.BufferedReader;
	import java.io.FileReader;
	import java.io.IOException;
	import java.util.*;
	import java.math.*;
	public class PS5 {

	//Map that keeps track of current part of speech and a map to the next possible parts of speech and the frequencies of going to each
	static Map<String, Map<String, Double>> POStoNextandFreq = new HashMap<String, Map<String, Double>>();

	//Map that keeps track of the current part of speech and a map to the observed words in this part of speech and their frequencies
	static Map<String, Map<String, Double>> POStoWordandFreq = new HashMap<String, Map<String, Double>>();

	//ArrayList full of the different parts of speech read in
	static ArrayList<String> POS = new ArrayList<String>();

	//Array List full of the words read in
	static ArrayList<String> totalWords = new ArrayList<String>();

	//List of the best path per line
	static List<String> bestPath = new ArrayList<String>();

	//Array List of Array Lists. Each individual array is a sentence held as a list of strings and the master array holds all sentences arrays
	static List<ArrayList<String>> sampleSentence = new ArrayList<ArrayList<String>>();

	//our answers is the corresponding parts of speech that we got at the end... All the best paths put together into a list
	static ArrayList<String> ourAnswers = new ArrayList<String>();
	//right answers is the correct answers given to us in order to test our accuracy
	static ArrayList<String> rightAnswers = new ArrayList<String>();


	public static void main(String[] args){
	//creates POStoWordandFreq
	createMapWords();

	//creates POStoNextandFreq
	createMapNext();

	//OPTION 1: RUN THIS (comment out option 2)

	//Go through each sentence and put it through the viterbi algorithm
	for(int i =0; i< sampleSentence.size(); i++) {
	viterbi(sampleSentence.get(i)); //calls viterbi method
	}
	System.out.println(corrections(rightAnswers, ourAnswers)); //prints out the result


	//OPTION 2: RUN THIS (comment out option 1)
	//This is the method that does the personal input instead of reading from a file
	//personalInput();

	}

	//does the personal input sentence tagging
	public static void personalInput(){
	Scanner scan = new Scanner(System.in); //our scanner
	while(scan.hasNextLine()){
	String input = scan.nextLine(); //line the person entered
	if(input == "-1"){ //will keep running till u enter -1
	break;
	}
	else{
	ArrayList<String> sendtoviterbi = new ArrayList<String>(); //array list we will send viterbi
	String[] allWords =input.split(" "); //splitting it by spaces
	for(String w: allWords){ //put all words into the array list
	sendtoviterbi.add(w);
	}
	viterbi(sendtoviterbi); //do viterbi
	System.out.println(bestPath); //print the best path
	}
	}
	}

	//Method that calculates the total correct and the total wrong tags when testing from our given documents
	public static String corrections(List<String> answerKey, List<String> test){
	int same = 0;
	int diff = 0;
	for(int i = 0; i<answerKey.size(); i++){ //goes through each tag
	if(answerKey.get(i).equals(test.get(i))){ //if they are equal increment same
	same+=1;
	}
	else{ //if they are different increment different
	diff+=1;
	}
	}
	String response = "You got " + same + " correct and " + diff + " incorrect"; //create response to print
	return response;
	}

	//This is the viterbi algorithm. It takes in a sentence and tags all the letters
	public static void viterbi(ArrayList sentence){
	bestPath = new ArrayList<String>(); //we will be storing the best path in this array.

	//we will be keeping track of the back track here. The integer is the index in the sentence and the string is the next state and the second string is the current state
	Map<Integer, HashMap<String, String>> backtrace = new HashMap<Integer, HashMap<String,String>>();

	//make a set called currStates which only has "#" so far
	Set<String> currStates = new HashSet<String>();
	currStates.add("#");
	//make a map called currScores which only contains "#" so far
	Map<String, Double> currScores = new HashMap<String,Double>();
	currScores.put("#", 0.0);


	//for i from 0 to # observations - 1
	for(int i = 0; i<sentence.size(); i++){

	//starting the backtrace
	backtrace.put(i, new HashMap<String,String>());

	//nextStates = empty Set
	Set<String> nextStates = new HashSet<String>();

	//nextScores = empty map
	Map<String,Double> nextScores = new HashMap<String,Double>();

	//for each currState in currStates
	for(String currState: currStates){

	//for each transition currState -> nextState
	for(String nextState: POStoNextandFreq.get(currState).keySet()){

	//add nextState to nextStates
	nextStates.add(nextState);

	//nextScore = currScores[currState] + // path to here
	//transitionScore(currState -> nextState) + // take a step to there
	//observationScore(observations[i] in nextState) // make the observation there
	Double nextScore = currScores.get(currState) + POStoNextandFreq.get(currState).get(nextState);
	if(POStoWordandFreq.get(nextState).containsKey(sentence.get(i))){
	nextScore += POStoWordandFreq.get(nextState).get(sentence.get(i));
	}
	else{
	nextScore-=100;
	}
	//if nextState isn't in nextScores or nextScore is greater than the current nextScores[nextState]
	if(!nextScores.containsKey(nextState) \|\| nextScore>nextScores.get(nextState)){
	//set nextScores[nextState] to nextScore
	nextScores.put(nextState, nextScore);
	//remember that pred of nextState @ i is curr (change the backtrace to the better option)
	backtrace.get(i).put(nextState, currState);

	}
	}
	}
	//update the states
	currStates = nextStates;
	currScores = nextScores;
	}

	double highest = -90000000.0; //very low number as the base case
	String highestPOS = "";
	//go through each of the final scores and find the highest score and which part of speech it corresponds to
	for(String s: currScores.keySet()){

	if(currScores.get(s)>highest){
	highest = currScores.get(s);
	highestPOS = s;
	}
	}
	bestPath.add(highestPOS); //add the part of speech to the best path

	//for each word in the sentence, do the back tracking
	for(int k = sentence.size()-1; k>0; k--){
	bestPath.add(0, backtrace.get(k).get(highestPOS)); //add the highest linked to the front of the list
	highestPOS = backtrace.get(k).get(highestPOS); //reset highest after having it found the previous highest
	}

	//the best path is part of our answers because best path it tagged
	ourAnswers.addAll(bestPath);
	}


	//creates the POStoNextandFreq map
	public static void createMapNext(){

	POStoNextandFreq.put("#", new HashMap<String, Double>());

	//for each part of speech in the list part of speech
	for(int i =0; i< POS.size(); i++){

	String currentPOS = POS.get(i); //get the current part of speech and store it in currentPOS

	if(!POStoNextandFreq.get("#").containsKey(currentPOS)){
	POStoNextandFreq.get("#").put(currentPOS, 1.0);
	}

	//if there is a part of speech after it
	if(i+1 < POS.size()){
	String nextPOS= POS.get(i+1); //set next part of speech to the next part of speech

	//If the map already has the current part of speech
	if(POStoNextandFreq.containsKey(currentPOS)){
	//and if the current part of speech has the next part of speech in its map already
	if(POStoNextandFreq.get(currentPOS).containsKey(nextPOS)){
	//temp freq is the current frequency
	Double tempfreq = POStoNextandFreq.get(currentPOS).get(nextPOS);
	tempfreq++; //update the current frequency by 1
	//reset the value of that next part of speech compared to the current part of speech
	POStoNextandFreq.get(currentPOS).put(nextPOS, tempfreq);
	}
	else{ //otherwise create a new entry in the current POS's map of next POSs and set the frequency to 1
	POStoNextandFreq.get(currentPOS).put(nextPOS, 1.0);
	}
	}
	//if the map does not already have the current part of speech
	else{
	//put the current part of speech in with a new map
	POStoNextandFreq.put(currentPOS, new HashMap<String, Double>());
	//in the new map put the next part of speech in and set its frequency to 1
	POStoNextandFreq.get(currentPOS).put(nextPOS, 1.0);
	}
	}
	}



	//Changing "frequencies" from literal number of times seen, to the logarithms of percent of time seen to keep numbers small
	//when going through Viterbi algorithm
	for(String pos: POStoNextandFreq.keySet()){
	Double total = 0.0; //total counts the total time the part of speech was used
	//for each part of speech that comes next from the current part of speech
	for(String w: POStoNextandFreq.get(pos).keySet()){
	//add the frequency of the next part of speech
	total+= POStoNextandFreq.get(pos).get(w);
	}
	//for each part of speech that comes next from the current part of speech
	for(String w: POStoNextandFreq.get(pos).keySet()){
	//current frequency is the specific next part of speech divided by the total
	double currfreq = POStoNextandFreq.get(pos).get(w)/total;
	//we then take the log of that
	double newfreq = Math.log(currfreq);
	//put the log back into the map
	POStoNextandFreq.get(pos).put(w, newfreq);
	}
	}

	}

	//This creates the POStoWordandFreq map
	public static void createMapWords(){
	//puts all seen parts of speech in a list
	createPOS();
	//puts all seen words in a list
	createAllWords();

	//for each part of speech in the part of speech list
	for(int i= 0; i<POS.size(); i++){

	//this is the current part of speech
	String currentPOS = POS.get(i);

	//If the map has the current part of speech
	if(POStoWordandFreq.containsKey(currentPOS)){
	if(i+1 < totalWords.size()){ //if this isn't the last POS/Word combo in our training files
	//if the part of speech contains the current word already
	if(POStoWordandFreq.get(currentPOS).containsKey(totalWords.get(i))){
	//get the current word frequency
	double curr = POStoWordandFreq.get(currentPOS).get(totalWords.get(i));
	curr++; //increase the frequency by 1.0
	POStoWordandFreq.get(currentPOS).put(totalWords.get(i), curr); //update the frequency
	}
	//else this POS has never been associated with the corresponding word, so put frequency to 1
	else{
	POStoWordandFreq.get(currentPOS).put(totalWords.get(i), 1.0);
	}
	}
	}

	//otherwise, if the part of speech isn't already in the map
	else{
	//add the part of speech and create a new "sub"map (since this is a map that has a second map as its key)
	POStoWordandFreq.put(currentPOS, new HashMap<String, Double>());
	//add the current word to that part of speech and set its frequency to 1
	POStoWordandFreq.get(currentPOS).put(totalWords.get(i), 1.0);
	}
	}

	//Changing "frequencies" from literal number of times seen, to the logarithms of percent of time seen to keep numbers small
	//when going through Viterbi algorithm
	for(String pos: POStoWordandFreq.keySet()){
	Double total = 0.0; //this is used to calculate the total
	//for each of the words in the part of speech
	for(String w: POStoWordandFreq.get(pos).keySet()){
	total+= POStoWordandFreq.get(pos).get(w); //update the total
	}
	//for each of the words in the part of speech
	for(String w: POStoWordandFreq.get(pos).keySet()){
	//update current frequency by dividing by the total
	double currfreq = POStoWordandFreq.get(pos).get(w)/total;
	//new frequency is the log of that because don't want values to be too small
	double newfreq = Math.log(currfreq);
	//reset the value with the log
	POStoWordandFreq.get(pos).put(w, newfreq);
	}
	}
	}

	//Fills in our list of words from a file we have been given
	public static void createAllWords(){
	BufferedReader read;

	try{
	read = new BufferedReader(new FileReader("inputs/simple-train-sentences.txt"));
	//word is the next line. Line consists of a whole sentence
	String word = read.readLine();
	while(word !=null){ //while word is a word and not null

	//make an array list of the current words
	ArrayList<String> curr = new ArrayList<String>();

	//split all the words by spaces
	String[] allWords =word.split(" ");

	//for all of the words in the current word
	for(String w: allWords){
	curr.add(w.toLowerCase()); //make them lower case to avoid errors
	totalWords.add(w.toLowerCase()); //adding to total words because that will be used in making the word map
	}
	sampleSentence.add(curr); //adding curr to array list of lists of each sentence so we can traverse a document sentence by sentence instead of as an entire document
	word = read.readLine(); //read in next line
	}
	read.close(); //close the reader so that empty file doesn't come back to crash this lab when we only have a few more hours to finish it
	}
	catch(IOException e){
	System.out.print("Can't open file");
	}
	}

	//This method fills a list of all the Part of Speech read in from a text document. More detailed comments in method above they are pretty similar
	public static void createPOS(){
	BufferedReader read;
	try{ //try catch in case of file errors
	read = new BufferedReader(new FileReader("inputs/simple-train-tags.txt")); //brown-train-tags given to us
	String word = read.readLine();// reading in the line
	while(word !=null){ //go through each line and add each specific word split by spaces
	String[] allWords = word.split(" ");
	for(String w: allWords){
	POS.add(w);
	rightAnswers.add(w);
	}
	word = read.readLine();
	}
	read.close();
	}
	//exception
	catch(IOException e){
	System.out.print("Can't open file");
	}
	}
	}