Skip to content

Instantly share code, notes, and snippets.

@powerlim2
Created May 20, 2013 20:37
Show Gist options
  • Save powerlim2/5615314 to your computer and use it in GitHub Desktop.
Save powerlim2/5615314 to your computer and use it in GitHub Desktop.
Sentimental Analysis Class — read text files within the jar file.
import java.io.*;
import java.util.Hashtable;
/**
* Created with IntelliJ IDEA.
* User: joonhyunglim
* Date: 5/18/13
* Time: 8:31 PM
* To change this template use File | Settings | File Templates.
*/
public class MapReduceSentimentScore {
/*
OOP class for the sentimental analysis.
It takes a sentence and spit out a value (integer number) corresponding to the sentence's sentiment.
*/
Hashtable<String,Integer> hashtable;
Hashtable<String,String> htstem;
String posline, negline, nounline = null;
String verbline, adjline, advline = null;
int posnum = 0, negnum = 0, nounnum=0, verbnum=0,adjnum=0,advnum=0;
public MapReduceSentimentScore() {
try{
InputStream is = this.getClass().getResourceAsStream("positive-words.txt");
// get ready for the sentiment analysis; prepare stem dictionaries, positive negative words dictionary.
// File pos = new File();
InputStream neg = this.getClass().getResourceAsStream("negative-words.txt");
InputStream noun = this.getClass().getResourceAsStream("noun.txt");
InputStream verb = this.getClass().getResourceAsStream("verb.txt");
InputStream adv = this.getClass().getResourceAsStream("adv.txt");
InputStream adj = this.getClass().getResourceAsStream("adj.txt");
BufferedInputStream posi = new BufferedInputStream(is);
BufferedReader posin = new BufferedReader(new InputStreamReader(new DataInputStream(posi)));
BufferedReader negin = new BufferedReader(new InputStreamReader(new DataInputStream(neg)));
BufferedReader nounin = new BufferedReader(new InputStreamReader(new DataInputStream(noun)));
BufferedReader verbin = new BufferedReader(new InputStreamReader(new DataInputStream(verb)));
BufferedReader adjin = new BufferedReader(new InputStreamReader(new DataInputStream(adv)));
BufferedReader advin = new BufferedReader(new InputStreamReader(new DataInputStream(adj)));
hashtable = new Hashtable<String,Integer>();
htstem = new Hashtable<String,String>();
// loading those data into hash tables
while ((posline = posin.readLine()) != null) {
hashtable.put(posline, 1);
posnum++;
}
while ((negline = negin.readLine()) != null) {
hashtable.put(negline, -1);
negnum++;
}
while ((nounline = nounin.readLine()) != null) {
String tokens[] = nounline.split(" ");
htstem.put(tokens[0], tokens[1]);
nounnum++;
}
while ((verbline = verbin.readLine()) != null) {
String tokens[] = verbline.split(" ");
htstem.put(tokens[0], tokens[1]);
verbnum++;
}
while ((adjline = adjin.readLine()) != null) {
String tokens[] = adjline.split(" ");
htstem.put(tokens[0], tokens[1]);
adjnum++;
}while ((advline = advin.readLine()) != null) {
String tokens[] = advline.split(" ");
htstem.put(tokens[0], tokens[1]);
advnum++;
}
// this code below will print out whether it is ready for the analysis.
System.out.println(posnum+" of positive words and "+negnum+" of negative words, "+nounnum+" nouns, "+verbnum+" verbs, "+adjnum+" adjs, "+advnum+" advs are ready for use.");
// close the input files
posin.close();
negin.close();
nounin.close();
verbin.close();
adjin.close();
advin.close();
} catch (IOException i) {
i.printStackTrace();
}
}
public Integer analysis(String sentence) {
// this bracket below is the part for actual analysis.
String line = sentence;
String[] tweets;
int rownum = 0;
int pcatchnum = 0;
int ncatchnum = 0;
if (!line.isEmpty()) {
int value = 0;
tweets = line.split(" ");
for(String i : tweets) {
// ignore all none word characters
i = i.replaceAll("\\W", "");
/* if we can find it from pos & neg words, then use it.
otherwise, try stemming and do it again.
*/
if (hashtable.get(i)!=null){
value += hashtable.get(i);
} else { // if we cannot, try to stem it and find it again.
String stemmed;
if(htstem.get(i)!=null){
stemmed = htstem.get(i);
// this is not necessary but will give you an idea of which words are being caught by stemming.
// System.out.println("**** "+i+" is stemmed");
} else {
stemmed = i;
}
if (hashtable.get(stemmed)!=null){
value += hashtable.get(stemmed);
}
}
}
if (value > 0) {
return 1; // this implies positive.
} else if (value == 0) {
return 2; // this implies neutral.
} else {
return 3; // this implies negative.
}
}
return null; // this implies error!
}
//}
// we can check if this sentiment analysis class actually does its job :)
public static void main(String[] args) {
int value = 0;
String[] sentences = new String[] {"Benedict is a good student?", "Do you know who Benedict is?", "Joon is a reliable person", "is Joon a bad student?", "Samsung is bad phone.", "fjwefjpoewjfopewjfopewjfopewjofjwep"};
MapReduceSentimentScore good = new MapReduceSentimentScore();
for (int i = 0; i < sentences.length; i++) {
value = good.analysis(sentences[i]);
System.out.println(sentences[i]+" : "+value);
}
}
}
@powerlim2
Copy link
Author

The jar file corresponding to this class is in my Github.
It's called "MRSentiment.jar"

Cheers,
Joon

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment