Skip to content

Instantly share code, notes, and snippets.

View colbyford's full-sized avatar
🧬

Colby T. Ford colbyford

🧬
View GitHub Profile
@colbyford
colbyford / SparkML_LogisticRegression_Classification.py
Last active September 23, 2022 16:38
SparkML Logistic Regression Classification Script with Cross-Validation and Parameter Sweep
########################################
## Title: Spark MLlib Logistic Regression Classification Script, with Cross-Validation and Parameter Sweep
## Language: PySpark
## Author: Colby T. Ford, Ph.D.
########################################
from pyspark.ml.classification import LogisticRegression
from pyspark.ml.tuning import ParamGridBuilder, CrossValidator
from pyspark.ml.evaluation import BinaryClassificationEvaluator
from pyspark.mllib.evaluation import BinaryClassificationMetrics
@colbyford
colbyford / SparkML_DecisionTree_Classification.py
Last active September 23, 2022 16:40
SparkML Decision Tree Classification Script with Cross-Validation and Parameter Sweep
########################################
## Title: Spark MLlib Decision Tree Classification Script, with Cross-Validation and Parameter Sweep
## Language: PySpark
## Author: Colby T. Ford, Ph.D.
########################################
from pyspark.ml.classification import DecisionTreeClassifier
from pyspark.ml.tuning import ParamGridBuilder, CrossValidator
from pyspark.ml.evaluation import BinaryClassificationEvaluator
from pyspark.mllib.evaluation import BinaryClassificationMetrics
@colbyford
colbyford / SparkML_RandomForest_Classification.py
Last active September 21, 2022 15:59
SparkML Random Forest Classification Script with Cross-Validation and Parameter Sweep
########################################
## Title: Spark MLlib Random Forest Classification Script, with Cross-Validation and Parameter Sweep
## Language: PySpark
## Author: Colby T. Ford, Ph.D.
########################################
from pyspark.ml.classification import RandomForestClassifier
from pyspark.ml.tuning import ParamGridBuilder, CrossValidator
from pyspark.ml.evaluation import BinaryClassificationEvaluator
from pyspark.mllib.evaluation import BinaryClassificationMetrics
@colbyford
colbyford / SparkML_DataPrep_BinaryClassification.py
Last active September 23, 2022 16:41
SparkML Data Preparation Steps for Binary Classification Models
########################################
## Title: Spark MLlib Classification Data Prep Script
## Language: PySpark
## Author: Colby T. Ford, Ph.D.
########################################
from pyspark.ml import Pipeline
from pyspark.ml.feature import OneHotEncoder, OneHotEncoderEstimator, StringIndexer, VectorAssembler
label = "dependentvar"
@colbyford
colbyford / SparkML_NaiveBayes_Classification.py
Last active September 23, 2022 16:41
SparkML Naïve Bayes Script with Cross-Validation and Parameter Sweep
########################################
## Title: Spark MLlib Naïve Bayes Classification Script, with Cross-Validation and Parameter Sweep
## Language: PySpark
## Author: Colby T. Ford, Ph.D.
########################################
from pyspark.ml.classification import NaiveBayes
from pyspark.ml.tuning import ParamGridBuilder, CrossValidator
from pyspark.ml.evaluation import BinaryClassificationEvaluator
from pyspark.mllib.evaluation import BinaryClassificationMetrics
@colbyford
colbyford / SparkML_Scorer.py
Last active September 23, 2022 16:41
Score data using a transformation pipeline and trained SparkML model.
########################################
## Title: Spark MLlib Model Scorer
## Language: PySpark
## Author: Colby T. Ford, Ph.D.
########################################
from pyspark.ml.tuning import CrossValidatorModel
from pyspark.ml import PipelineModel
from pyspark.sql.functions import col, round
from pyspark.sql.types import IntegerType, FloatType
@colbyford
colbyford / SparkML_ModelSaver.py
Last active September 23, 2022 16:41
Save trained SparkML model to storage. Load model then transform new dataset.
########################################
## Title: Spark MLlib Model Saver
## Language: PySpark
## Author: Colby T. Ford, Ph.D.
########################################
## Write Model to Blob
lrcvModel.save("/mnt/trainedmodels/lr")
rfcvModel.save("/mnt/trainedmodels/rf")
dtcvModel.save("/mnt/trainedmodels/dt")
@colbyford
colbyford / SparkML_LinearRegression.py
Last active January 10, 2024 02:36
SparkML Linear Regression Script with Cross-Validation and Parameter Sweep
########################################
## Title: Spark MLlib Linear Regression Script, with Cross-Validation and Parameter Sweep
## Language: PySpark
## Author: Colby T. Ford, Ph.D.
########################################
from pyspark.ml.regression import LinearRegression
from pyspark.ml.tuning import ParamGridBuilder, CrossValidator
from pyspark.ml.evaluation import RegressionEvaluator
@colbyford
colbyford / SparkML_RandomForest_Regression.py
Last active September 23, 2022 16:42
SparkML Random Forest Regression Script with Cross-Validation and Parameter Sweep
########################################
## Title: Spark MLlib Random Forest Regression Script, with Cross-Validation and Parameter Sweep
## Language: PySpark
## Author: Colby T. Ford, Ph.D.
########################################
from pyspark.ml.regression import RandomForestRegressor
from pyspark.ml.tuning import ParamGridBuilder, CrossValidator
from pyspark.ml.evaluation import RegressionEvaluator
@colbyford
colbyford / SparkML_DecisionTree_Regression.py
Last active September 23, 2022 16:42
SparkML Decision Tree Regression Script with Cross-Validation and Parameter Sweep
########################################
## Title: Spark MLlib Decision Tree Regression Script, with Cross-Validation and Parameter Sweep
## Language: PySpark
## Author: Colby T. Ford, Ph.D.
########################################
from pyspark.ml.regression import DecisionTreeRegressor
from pyspark.ml.tuning import ParamGridBuilder, CrossValidator
from pyspark.ml.evaluation import RegressionEvaluator