Skip to content

Instantly share code, notes, and snippets.

@Bekbolatov
Last active August 29, 2015 14:20
Show Gist options
  • Save Bekbolatov/49b0e548c580aa94c4e3 to your computer and use it in GitHub Desktop.
Save Bekbolatov/49b0e548c580aa94c4e3 to your computer and use it in GitHub Desktop.
Multiclass SVM for digit recognition
// using https://github.com/Bekbolatov/spark/commit/463d73323d5f08669d5ae85dc9791b036637c966
import org.apache.spark.mllib.classification.SVMMultiClassWithSGD
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.linalg.Vectors
import breeze.linalg.DenseVector
val digits_train = sc.textFile("/data/pendigits.tra").map(line => DenseVector(line.split(",").map(_.trim().toDouble))).map( v => LabeledPoint(v(-1),Vectors.dense(v(0 to 15).toArray))).cache()
val digits_test = sc.textFile("/data/pendigits.tes").map(line => DenseVector(line.split(",").map(_.trim().toDouble))).map( v => LabeledPoint(v(-1),Vectors.dense(v(0 to 15).toArray)))
val model = SVMMultiClassWithSGD.train(digits_train, 100)
val predictionAndLabel = digits_test.map(p => (model.predict(p.features), p.label))
val accuracy = 1.0 * predictionAndLabel.filter(x => x._1 == x._2).count() / digits_test.count()
val scoreAndLabels = digits_test.map { point =>
val score = model.predict(point.features)
(score, point.label)
}
scoreAndLabels.take(5)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment