Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
weka_dataset_creation.java
import weka.classifiers.Evaluation;
import weka.classifiers.bayes.NaiveBayes;
import weka.core.Instances;
import weka.core.Instance;
import weka.core.converters.ConverterUtils.DataSource;
import weka.core.Attribute;
import weka.core.DenseInstance;
import java.io.File;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.List;
import java.util.ArrayList;
public class Example {
public String readFile (String filename) throws IOException
{
String content = null;
File file = new File(filename); //for ex foo.txt
FileReader reader = null;
try {
reader = new FileReader(file);
char[] chars = new char[(int) file.length()];
reader.read(chars);
content = new String(chars);
reader.close();
} catch (IOException e) {
e.printStackTrace();
} finally {
if(reader !=null){reader.close();}
}
return content;
}
public static void main(String[] args) throws Exception{
// Declare text attribute
Attribute attribute_text = new Attribute("text",(List<String>) null);
// Declare the class attribute along with its values
ArrayList<String> classAttributeValues = new ArrayList<String>();
classAttributeValues.add("spam");
classAttributeValues.add("ham");
// FastVector fvClassVal = new FastVector(2);
// fvClassVal.addElement("spam");
// fvClassVal.addElement("ham");
Attribute classAttribute = new Attribute("label", classAttributeValues);
// Declare the feature vector
ArrayList<Attribute> fvWekaAttributes = new ArrayList<Attribute>();
fvWekaAttributes.add(classAttribute);
fvWekaAttributes.add(attribute_text);
/*
Create an empty training set
name the relation “Rel”.
set capacity of 10
*/
Instances trainingSet = new Instances("Rel", fvWekaAttributes, 10);
// Set class index
trainingSet.setClassIndex(0);
try(BufferedReader br = new BufferedReader(new FileReader("data/train.txt"))) {
for(String line; (line = br.readLine()) != null; ) {
// System.out.println(line);
try{
String parts[] = line.split("\\s+",2);
// Create the instance
if (!parts[0].isEmpty() && !parts[1].isEmpty()){
Instance row = new DenseInstance(2);
System.out.println(String.format("class: %s\n", parts[0]));
row.setValue(fvWekaAttributes.get(0), parts[0]);
row.setValue(fvWekaAttributes.get(1), parts[1]);
// add the instance
trainingSet.add(row);
}
// System.out.println(String.format("class: %s,\n text: %s", parts[0], parts[1]));
}
catch (ArrayIndexOutOfBoundsException e){
System.out.println("invalid row");
}
}
}
catch (IOException e){
e.printStackTrace();
}
System.out.println(trainingSet);
// DataSource source = new DataSource("iris.arff");
// Instances dataset = source.getDataSet();
// dataset.setClassIndex(dataset.numAttributes()-1);
// NaiveBayes nb = new NaiveBayes();
// nb.buildClassifier(dataset);
// // evaluation set
// Evaluation eval = new Evaluation(dataset);
// eval.evaluateModel(nb, dataset);
// System.out.println(eval.toSummaryString());
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment