Skip to content

Instantly share code, notes, and snippets.

@Lhy121125
Created December 25, 2022 20:13
Show Gist options
  • Save Lhy121125/c8aae351cb6d2c61ae2a6fddf71a92a7 to your computer and use it in GitHub Desktop.
Save Lhy121125/c8aae351cb6d2c61ae2a6fddf71a92a7 to your computer and use it in GitHub Desktop.
AI Algorithm for Sentiment Analysis
/*
* SentAnalysis.java
* Ray Zeng(Tianrui) & Nick Luo(Haiyu)
* All group members were present and contributing during all work on this project.
* We have neither received nor given any unauthorized aid in this assignment.
*/
package hw4;
/*
* Please see submission instructions for what to write here.
*/
import java.io.*;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Scanner;
public class SentAnalysis {
final static File TRAINFOLDER = new File("train-s21");
//Create two global variables to store all the count for both positive and negative
public static HashMap<String, Double> positive = new HashMap<String, Double>();
public static HashMap<String, Double> negative = new HashMap<String, Double>();
//The amount of positive files & negative files
public static double posTotal = 0;
public static double negTotal = 0;
//Number of words in all examples labels positive
public static double posWords = 0;
//Number of words in all examples labels negative
public static double negWords = 0;
public static void main(String[] args) throws IOException
{
ArrayList<String> files = readFiles(TRAINFOLDER);
train(files);
//System.out.println(positive);
//System.out.println(negative);
//if command line argument is "evaluate", runs evaluation mode
if (args.length==1 && args[0].equals("evaluate")){
evaluate();
}
else{//otherwise, runs interactive mode
@SuppressWarnings("resource")
Scanner scan = new Scanner(System.in);
System.out.print("Text to classify>> ");
String textToClassify = scan.nextLine();
while(!textToClassify.equals("quit")){
System.out.println("Result: "+classify(textToClassify));
System.out.print("Text to classify>> ");
textToClassify = scan.nextLine();
}
//System.out.println("Result: "+classify(textToClassify));
}
}
/*
* Takes as parameter the name of a folder and returns a list of filenames (Strings)
* in the folder.
*/
public static ArrayList<String> readFiles(File folder){
System.out.println("Populating list of files");
//List to store filenames in folder
ArrayList<String> filelist = new ArrayList<String>();
for (File fileEntry : folder.listFiles()) {
String filename = fileEntry.getName();
filelist.add(filename);
}
/*
for (String fileEntry : filelist) {
System.out.println(fileEntry);
}
System.out.println(filelist.size());
*/
return filelist;
}
/*
* TO DO
* Trainer: Reads text from data files in folder datafolder and stores counts
* to be used to compute probabilities for the Bayesian formula.
* You may modify the method header (return type, parameters) as you see fit.
*/
public static void train(ArrayList<String> files) throws FileNotFoundException
{
for(File f : TRAINFOLDER.listFiles()){
String filename = f.getName();
String[] arr = filename.split("-");
Scanner reader = new Scanner(f);
//positive files
if(arr[1].equals("5")) {
posTotal++;
//If there is next word
if(reader.hasNext()) {
String word = reader.next();//We need to make all the letter to lower case as java is case sensitive
//If there is no such word exist
if(!positive.containsKey(word)) {
positive.put(word, 1.0);
posWords++;
}
//Update the count
else {
positive.put(word, positive.get(word)+1);
posWords++;
}
}
}
//negative files
else {
negTotal++;
//If there is next word
if(reader.hasNext()) {
String word = reader.next();
//If there is no such word exist
if(!negative.containsKey(word)) {
negative.put(word, 1.0);
negWords++;
}
//Update the count
else {
negative.put(word, negative.get(word)+1);
negWords++;
}
}
}
reader.close();
}
}
/*
* Classifier: Classifies the input text (type: String) as positive or negative
*/
public static String classify(String text)
{
String result="";
String[] words = text.split(" ");
double n = (double)words.length;//Number of features
double lambda = 0.0001;
//calculate the probability of text
double pPos = posTotal/(posTotal + negTotal); //P(positive)
double pos = 0;
double pNeg = negTotal/(posTotal + negTotal); //P(negative)
double neg = 0;
for(String word : words) {
String fi = word;
//If fi is in positive dictionary
if(positive.containsKey(fi)) {
pos += log2((positive.get(fi) / positive.size()));
}
//smoothing
else {
pos += Math.log(lambda/(positive.size() + n * lambda));
}
//If fi is in negative dictionary
if(negative.containsKey(fi)) {
neg += log2((negative.get(fi) / negative.size()));
}
//smoothing
else {
neg += Math.log(lambda/(negative.size() + n * lambda));
}
}
pos += log2(pPos);//Get P(pos|F);
neg += log2(pNeg); //Get P(neg|F);
if(pos >= neg) {
result = "positive";
}
else {
result = "negative";
}
return result;
}
/**
* Computes log_2(d)
* @param d - value
* @return log_2(d)
*/
private static double log2(double d){
return Math.log(d)/Math.log(2);
}
/*
* TO DO
* Classifier: Classifies all of the files in the input folder (type: File) as positive or negative
* You may modify the method header (return type, parameters) as you like.
*/
public static void evaluate() throws FileNotFoundException
{
@SuppressWarnings("resource")
Scanner scan = new Scanner(System.in);
System.out.print("Enter folder name of files to classify: ");
String foldername = scan.nextLine();
File folder = new File(foldername);
ArrayList<String> filesToClassify = readFiles(folder);
double numPosCorrect = 0;
double numNegCorrect = 0;
double numPosReturned = 0;
double numNegReturned = 0;
for(File file : folder.listFiles()) {
//split the filename
String filename = file.getName();
String[] arr = filename.split("-");
//convert the string
String str = fileConvert(file);
String result = classify(str);
if(arr.length>=2) {//make sure the name is valid
//If classified as positive file
if(result.equals("positive")) {
if(arr[1].equals("5")){
numPosCorrect ++;
}
numPosReturned++;
}
//If classified as negative file
else{
if(arr[1].equals("1")) {
numNegCorrect ++;
}
numNegReturned++;
}
}
}
//calculate the accuracy and precision
double acc = (numPosCorrect + numNegCorrect)/(numPosReturned + numNegReturned);
double prePos = numPosCorrect/numPosReturned;
double preNeg = numNegCorrect/numNegReturned;
//Result
System.out.println("\nRESULTS\n");
System.out.println("Accuracy:" + acc*100 + "%");
System.out.println("Precision (positive):" + prePos*100 + "%");
System.out.println("Precision (negative):" + preNeg*100 + "%");
scan.close();
}
public static String fileConvert(File file) throws FileNotFoundException {
String str = "";
Scanner text = new Scanner(file);
while(text.hasNextLine()) {
str += text.nextLine();
}
text.close();
return str;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment