Created
April 23, 2019 06:01
-
-
Save vivek081166/441356314635d37cd86c545ee9c51105 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import com.salesforce.op._ | |
import com.salesforce.op.readers._ | |
import com.salesforce.op.features._ | |
import com.salesforce.op.features.types._ | |
import com.salesforce.op.stages.impl.classification._ | |
import org.apache.spark.SparkConf | |
import org.apache.spark.sql.SparkSession | |
implicit val spark = SparkSession.builder.config(new SparkConf()).getOrCreate() | |
import spark.implicits._ | |
// Read Titanic data as a DataFrame | |
val passengersData = DataReaders.Simple.csvCase[Passenger](path = pathToData).readDataset().toDF() | |
// Extract response and predictor features | |
val (survived, predictors) = FeatureBuilder.fromDataFrame[RealNN](passengersData, response = "survived") | |
// Automated feature engineering | |
val featureVector = predictors.transmogrify() | |
// Automated feature validation and selection | |
val checkedFeatures = survived.sanityCheck(featureVector, removeBadFeatures = true) | |
// Automated model selection | |
val (pred, raw, prob) = BinaryClassificationModelSelector().setInput(survived, checkedFeatures).getOutput() | |
// Setting up a TransmogrifAI workflow and training the model | |
val model = new OpWorkflow().setInputDataset(passengersData).setResultFeatures(pred).train() | |
println("Model summary:\n" + model.summaryPretty()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment