Created
December 25, 2022 20:13
-
-
Save Lhy121125/c8aae351cb6d2c61ae2a6fddf71a92a7 to your computer and use it in GitHub Desktop.
AI Algorithm for Sentiment Analysis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* SentAnalysis.java | |
* Ray Zeng(Tianrui) & Nick Luo(Haiyu) | |
* All group members were present and contributing during all work on this project. | |
* We have neither received nor given any unauthorized aid in this assignment. | |
*/ | |
package hw4; | |
/* | |
* Please see submission instructions for what to write here. | |
*/ | |
import java.io.*; | |
import java.nio.file.Files; | |
import java.util.ArrayList; | |
import java.util.HashMap; | |
import java.util.Scanner; | |
public class SentAnalysis { | |
final static File TRAINFOLDER = new File("train-s21"); | |
//Create two global variables to store all the count for both positive and negative | |
public static HashMap<String, Double> positive = new HashMap<String, Double>(); | |
public static HashMap<String, Double> negative = new HashMap<String, Double>(); | |
//The amount of positive files & negative files | |
public static double posTotal = 0; | |
public static double negTotal = 0; | |
//Number of words in all examples labels positive | |
public static double posWords = 0; | |
//Number of words in all examples labels negative | |
public static double negWords = 0; | |
public static void main(String[] args) throws IOException | |
{ | |
ArrayList<String> files = readFiles(TRAINFOLDER); | |
train(files); | |
//System.out.println(positive); | |
//System.out.println(negative); | |
//if command line argument is "evaluate", runs evaluation mode | |
if (args.length==1 && args[0].equals("evaluate")){ | |
evaluate(); | |
} | |
else{//otherwise, runs interactive mode | |
@SuppressWarnings("resource") | |
Scanner scan = new Scanner(System.in); | |
System.out.print("Text to classify>> "); | |
String textToClassify = scan.nextLine(); | |
while(!textToClassify.equals("quit")){ | |
System.out.println("Result: "+classify(textToClassify)); | |
System.out.print("Text to classify>> "); | |
textToClassify = scan.nextLine(); | |
} | |
//System.out.println("Result: "+classify(textToClassify)); | |
} | |
} | |
/* | |
* Takes as parameter the name of a folder and returns a list of filenames (Strings) | |
* in the folder. | |
*/ | |
public static ArrayList<String> readFiles(File folder){ | |
System.out.println("Populating list of files"); | |
//List to store filenames in folder | |
ArrayList<String> filelist = new ArrayList<String>(); | |
for (File fileEntry : folder.listFiles()) { | |
String filename = fileEntry.getName(); | |
filelist.add(filename); | |
} | |
/* | |
for (String fileEntry : filelist) { | |
System.out.println(fileEntry); | |
} | |
System.out.println(filelist.size()); | |
*/ | |
return filelist; | |
} | |
/* | |
* TO DO | |
* Trainer: Reads text from data files in folder datafolder and stores counts | |
* to be used to compute probabilities for the Bayesian formula. | |
* You may modify the method header (return type, parameters) as you see fit. | |
*/ | |
public static void train(ArrayList<String> files) throws FileNotFoundException | |
{ | |
for(File f : TRAINFOLDER.listFiles()){ | |
String filename = f.getName(); | |
String[] arr = filename.split("-"); | |
Scanner reader = new Scanner(f); | |
//positive files | |
if(arr[1].equals("5")) { | |
posTotal++; | |
//If there is next word | |
if(reader.hasNext()) { | |
String word = reader.next();//We need to make all the letter to lower case as java is case sensitive | |
//If there is no such word exist | |
if(!positive.containsKey(word)) { | |
positive.put(word, 1.0); | |
posWords++; | |
} | |
//Update the count | |
else { | |
positive.put(word, positive.get(word)+1); | |
posWords++; | |
} | |
} | |
} | |
//negative files | |
else { | |
negTotal++; | |
//If there is next word | |
if(reader.hasNext()) { | |
String word = reader.next(); | |
//If there is no such word exist | |
if(!negative.containsKey(word)) { | |
negative.put(word, 1.0); | |
negWords++; | |
} | |
//Update the count | |
else { | |
negative.put(word, negative.get(word)+1); | |
negWords++; | |
} | |
} | |
} | |
reader.close(); | |
} | |
} | |
/* | |
* Classifier: Classifies the input text (type: String) as positive or negative | |
*/ | |
public static String classify(String text) | |
{ | |
String result=""; | |
String[] words = text.split(" "); | |
double n = (double)words.length;//Number of features | |
double lambda = 0.0001; | |
//calculate the probability of text | |
double pPos = posTotal/(posTotal + negTotal); //P(positive) | |
double pos = 0; | |
double pNeg = negTotal/(posTotal + negTotal); //P(negative) | |
double neg = 0; | |
for(String word : words) { | |
String fi = word; | |
//If fi is in positive dictionary | |
if(positive.containsKey(fi)) { | |
pos += log2((positive.get(fi) / positive.size())); | |
} | |
//smoothing | |
else { | |
pos += Math.log(lambda/(positive.size() + n * lambda)); | |
} | |
//If fi is in negative dictionary | |
if(negative.containsKey(fi)) { | |
neg += log2((negative.get(fi) / negative.size())); | |
} | |
//smoothing | |
else { | |
neg += Math.log(lambda/(negative.size() + n * lambda)); | |
} | |
} | |
pos += log2(pPos);//Get P(pos|F); | |
neg += log2(pNeg); //Get P(neg|F); | |
if(pos >= neg) { | |
result = "positive"; | |
} | |
else { | |
result = "negative"; | |
} | |
return result; | |
} | |
/** | |
* Computes log_2(d) | |
* @param d - value | |
* @return log_2(d) | |
*/ | |
private static double log2(double d){ | |
return Math.log(d)/Math.log(2); | |
} | |
/* | |
* TO DO | |
* Classifier: Classifies all of the files in the input folder (type: File) as positive or negative | |
* You may modify the method header (return type, parameters) as you like. | |
*/ | |
public static void evaluate() throws FileNotFoundException | |
{ | |
@SuppressWarnings("resource") | |
Scanner scan = new Scanner(System.in); | |
System.out.print("Enter folder name of files to classify: "); | |
String foldername = scan.nextLine(); | |
File folder = new File(foldername); | |
ArrayList<String> filesToClassify = readFiles(folder); | |
double numPosCorrect = 0; | |
double numNegCorrect = 0; | |
double numPosReturned = 0; | |
double numNegReturned = 0; | |
for(File file : folder.listFiles()) { | |
//split the filename | |
String filename = file.getName(); | |
String[] arr = filename.split("-"); | |
//convert the string | |
String str = fileConvert(file); | |
String result = classify(str); | |
if(arr.length>=2) {//make sure the name is valid | |
//If classified as positive file | |
if(result.equals("positive")) { | |
if(arr[1].equals("5")){ | |
numPosCorrect ++; | |
} | |
numPosReturned++; | |
} | |
//If classified as negative file | |
else{ | |
if(arr[1].equals("1")) { | |
numNegCorrect ++; | |
} | |
numNegReturned++; | |
} | |
} | |
} | |
//calculate the accuracy and precision | |
double acc = (numPosCorrect + numNegCorrect)/(numPosReturned + numNegReturned); | |
double prePos = numPosCorrect/numPosReturned; | |
double preNeg = numNegCorrect/numNegReturned; | |
//Result | |
System.out.println("\nRESULTS\n"); | |
System.out.println("Accuracy:" + acc*100 + "%"); | |
System.out.println("Precision (positive):" + prePos*100 + "%"); | |
System.out.println("Precision (negative):" + preNeg*100 + "%"); | |
scan.close(); | |
} | |
public static String fileConvert(File file) throws FileNotFoundException { | |
String str = ""; | |
Scanner text = new Scanner(file); | |
while(text.hasNextLine()) { | |
str += text.nextLine(); | |
} | |
text.close(); | |
return str; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment