Skip to content

Instantly share code, notes, and snippets.

@ezhulenev
Created November 11, 2014 02:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ezhulenev/b66ac4b6cfd675195f3f to your computer and use it in GitHub Desktop.
Save ezhulenev/b66ac4b6cfd675195f3f to your computer and use it in GitHub Desktop.
SVM MLLib
object SVM extends App {
import org.apache.spark.mllib.classification.SVMWithSGD
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.{SparkContext, SparkConf}
import scala.util.Random
private lazy val sparkConf =
new SparkConf()
.setMaster("local[2]").
setAppName("SparkSVM")
lazy val sc = new SparkContext(sparkConf)
val rnd = new Random(seed = 123l)
def labeledPoint(label: Int, mean: Int, deviation: Int, features: Int = 10): LabeledPoint = {
def feature = (mean + (if (rnd.nextBoolean()) 1 else -1)*rnd.nextInt(deviation)).toDouble
val featuresVector = Vectors.dense((0 until features).map(_ => feature).toArray)
LabeledPoint(label, featuresVector)
}
val nFeatures = 2
def label0 = labeledPoint(0, 10, 5, features = 2)
def label1 = labeledPoint(1, 200, 5, features = 2)
val data0 = Seq.fill(1000)(label0)
val data1 = Seq.fill(1000)(label1)
val data = sc.parallelize(data0 ++ data1)
// Split data into training (60%) and test (40%).
val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L)
val training = splits(0).cache()
val test = splits(1)
// Run training algorithm to build the model
val numIterations = 300
val svm = new SVMWithSGD()
svm.optimizer.
setNumIterations(numIterations).
setRegParam(0.1)
val model = svm.run(training)
// Predict
Seq.fill(10)(label0).map {
point => println(s"Label0 ${model.predict(point.features)}")
}
Seq.fill(10)(label1).map {
point => println(s"Label1 ${model.predict(point.features)}")
}
// Clear the default threshold.
model.clearThreshold()
// Compute raw scores on the test set.
val scoreAndLabels = test.map { point =>
val score = model.predict(point.features)
(score, point.label)
}
// Get evaluation metrics.
val metrics = new BinaryClassificationMetrics(scoreAndLabels)
val auROC = metrics.areaUnderROC()
println(s"Area under ROC = $auROC")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment