indexer = [StringIndexer(inputCol=column_name, outputCol=column_name+"Index", handleInvalid="keep") for column_name in column_to_index] | |
assembler = VectorAssembler(inputCols=["PclassIndex","SexIndex","Age","Fare","WithFamily","EmbarkedIndex"], outputCol="features") | |
labelindexer = StringIndexer(inputCol="Survived", outputCol="label") | |
sql_transformer = SQLTransformer(statement="SELECT features, label FROM __THIS__") | |
indexer.extend([assembler, labelindexer, sql_transformer]) | |
lr = LogisticRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8) | |
indexer.append(lr) | |
pipeline = Pipeline(stages=indexer) | |
model = pipeline.fit(trainingData) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment