Skip to content

Instantly share code, notes, and snippets.

@atamborrino
Last active July 6, 2017 07:09
Show Gist options
  • Save atamborrino/a12d7835f7080edf56edc2ed65576fab to your computer and use it in GitHub Desktop.
Save atamborrino/a12d7835f7080edf56edc2ed65576fab to your computer and use it in GitHub Desktop.
In-memory use of MLeap, a serialization format for Spark ML (PoC style, no error handling)
import ml.combust.mleap.runtime.{LeapFrame, LocalDataset, Row}
import ml.combust.mleap.runtime.types.{DoubleType, StructField, StructType, TensorType}
import ml.combust.mleap.tensor.Tensor
import resource._
import ml.combust.mleap.runtime.MleapSupport._
import scala.collection.JavaConverters._
import com.google.common.jimfs.{Configuration, Jimfs}
import ml.combust.bundle.BundleFile
import java.nio.file.{FileSystems, Files}
val fs = Jimfs.newFileSystem(Configuration.unix())
val tmpFolder = fs.getPath("/tmp")
Files.createDirectory(tmpFolder)
val zippedBundle: Array[Byte] = // ex: get as blob from Cassandra
val uUID = UUID.randomUUID().toString
val path = tmpFolder.resolve(s"$uUID.zip")
Files.write(path, zippedBundle)
val zipUri = URI.create("jar:" + path.toUri)
val bundle = managed(BundleFile(zipUri)).acquireAndGet { bf =>
bf.loadMleapBundle().get
}
val schema = StructType(
StructField("features", TensorType(DoubleType()))
).get
val testData = LocalDataset(Row(Tensor.denseVector(Array(2.0, 3.0, 1.0))))
val frame = LeapFrame(schema, testData)
val result = bundle.root.transform(frame).get
val prediction = result.select("prediction").get.dataset(0).getDouble(0)
println(s"Prediction: $prediction")
Files.delete(path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment