Skip to content

Instantly share code, notes, and snippets.

import org.apache.spark.mllib.regression.{ RidgeRegressionWithSGD, LabeledPoint }
import org.apache.spark.{ SparkConf, SparkContext }
import org.apache.spark.mllib.linalg.Vectors
import scala.io.Source
object Main extends App {
val sparkConfig = new SparkConf().setAppName("quotes").setMaster("local")
val sparkContext = new SparkContext(sparkConfig)
val quotesFileLines = Source.fromFile("...your...path...").getLines.toList
@krro
krro / Main.scala
Created September 13, 2015 07:49
import org.apache.spark.mllib.regression.{ RidgeRegressionWithSGD, LabeledPoint }
import org.apache.spark.{ SparkConf, SparkContext }
import org.apache.spark.mllib.linalg.Vectors
import scala.io.Source
object Main extends App {
val sparkConfig = new SparkConf().setAppName("quotes").setMaster("local")
val sparkContext = new SparkContext(sparkConfig)
val quotesFileLines = Source.fromFile("...your...path...").getLines.toList
import org.apache.spark.mllib.regression.{ RidgeRegressionWithSGD, LabeledPoint }
import org.apache.spark.{ SparkConf, SparkContext }
import org.apache.spark.mllib.linalg.Vectors
import scala.io.Source
val sparkConfig = new SparkConf().setAppName("quotes").setMaster("local")
val sparkContext = new SparkContext(sparkConfig)
val quotesFileLines = Source.fromFile("...your...path...").getLines.toList
val prices = quotesFileLines.map { _.split(",").toList(5).toDouble }
val growths = prices.drop(1).zip(prices.dropRight(1)).map {
case (current, previous) => 100.0 * (current - previous) / previous
}
val probesNumber = 20
val labeledPoints = for(i <- probesNumber until growths.size) yield {
LabeledPoint(growths(i), Vectors.dense(growths.slice(i - probesNumber, i).toArray))
}
@krro
krro / rdd.scala
Created September 13, 2015 07:55
val labeledPointsRDD = sparkContext.parallelize(labeledPoints)
val Array(trainingData, testData) = labeledPointsRDD.randomSplit(Array(0.7, 0.3))