Skip to content

Instantly share code, notes, and snippets.

@tomtung
Created February 2, 2013 22:35
Show Gist options
  • Save tomtung/4699555 to your computer and use it in GitHub Desktop.
Save tomtung/4699555 to your computer and use it in GitHub Desktop.
This is an example script translated from wekaexamples.core.CreateInstances, which generates an weka.core.Instances object with different attribute types, and prints it out in ARFF format.
import java.util
import weka.core.{DenseInstance, Attribute, Instances}
import scala.collection.convert.wrapAll._
// 1. set up attributes
val atts = {
// numeric
val att1 = new Attribute("att1")
// nominal
val att2 = new Attribute("att2", (1 to 5).map("val" + _))
// string
val att3 = new Attribute("att3", null.asInstanceOf[util.ArrayList[String]])
// date
val att4 = new Attribute("att4", "yyyy-MM-dd")
// relational
val att5 = {
// numeric
val att5_1 = new Attribute("att5.1")
// nominal
val att5_2 = new Attribute("att5.2", (1 to 5).map("val5." + _))
val dataRel = {
val attsRel = new util.ArrayList[Attribute](List(att5_1, att5_2))
new Instances("att5", attsRel, 0)
}
new Attribute("att5", dataRel, 0)
}
new util.ArrayList[Attribute](List(att1, att2, att3, att4, att5))
}
// 2. create Instances object
val data = new Instances("MyRelation", atts, 0)
// 3. fill the data
// first instance
val vals1 = {
val vals = Array.ofDim[Double](data.numAttributes())
// numeric
vals(0) = math.Pi
// nominal
vals(1) = data.attribute(1).indexOfValue("val3")
// string
vals(2) = data.attribute(2).addStringValue("This is a string!")
// date
vals(3) = data.attribute(3).parseDate("2001-11-09")
// relational
vals(4) = {
val relation = data.attribute(4).relation()
val dataRel = new Instances(relation, 0)
dataRel.add(
new DenseInstance(1.0, Array(
Math.PI + 1,
relation.attribute(1).indexOfValue("val5.3"))))
dataRel.add(
new DenseInstance(1.0, Array(
Math.PI + 2,
relation.attribute(1).indexOfValue("val5.2"))))
data.attribute(4).addRelation(dataRel)
}
vals
}
// add
data.add(new DenseInstance(1.0, vals1))
// second instance
val vals2 = {
val vals = Array.ofDim[Double](data.numAttributes())
// numeric
vals(0) = math.E
// nominal
vals(1) = data.attribute(1).indexOfValue("val1")
// string
vals(2) = data.attribute(2).addStringValue("And another one!")
// date
vals(3) = data.attribute(3).parseDate("2000-12-01")
// relational
vals(4) = {
val relation = data.attribute(4).relation()
val dataRel = new Instances(relation, 0)
dataRel.add(
new DenseInstance(1.0, Array(
Math.E + 1,
relation.attribute(1).indexOfValue("val5.4"))))
dataRel.add(
new DenseInstance(1.0, Array(
Math.E + 2,
relation.attribute(1).indexOfValue("val5.1"))))
data.attribute(4).addRelation(dataRel)
}
vals
}
// add
data.add(new DenseInstance(1.0, vals2))
// 4. output data
println(data)
@tomtung
Copy link
Author

tomtung commented Feb 2, 2013

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment