Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
import com.salesforce.op._
import com.salesforce.op.readers._
import com.salesforce.op.features._
import com.salesforce.op.features.types._
import com.salesforce.op.stages.impl.classification._
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
implicit val spark = SparkSession.builder.config(new SparkConf()).getOrCreate()
import spark.implicits._
// Read Titanic data as a DataFrame
val passengersData = DataReaders.Simple.csvCase[Passenger](path = pathToData).readDataset().toDF()
// Extract response and predictor features
val (survived, predictors) = FeatureBuilder.fromDataFrame[RealNN](passengersData, response = "survived")
// Automated feature engineering
val featureVector = predictors.transmogrify()
// Automated feature validation and selection
val checkedFeatures = survived.sanityCheck(featureVector, removeBadFeatures = true)
// Automated model selection
val (pred, raw, prob) = BinaryClassificationModelSelector().setInput(survived, checkedFeatures).getOutput()
// Setting up a TransmogrifAI workflow and training the model
val model = new OpWorkflow().setInputDataset(passengersData).setResultFeatures(pred).train()
println("Model summary:\n" + model.summaryPretty())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment