Created
June 15, 2022 07:17
-
-
Save mwitiderrick/d4872140611529956083400313e23903 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import layer | |
layer.login() | |
layer.init("experiment-tracking") | |
from layer.decorators import model | |
@model("lgbm") | |
def train(): | |
from sklearn.ensemble import HistGradientBoostingClassifier | |
from sklearn.datasets import make_classification | |
from sklearn.model_selection import train_test_split | |
import matplotlib.pyplot as plt | |
from sklearn.metrics import average_precision_score, roc_auc_score, roc_curve,precision_recall_curve | |
layer.log({"Description": "Experiment tracking with Layer"}) | |
X, y = make_classification(n_samples=1000, n_features=20) | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0) | |
# Model Parameters | |
learning_rate = 0.01 | |
max_depth = 6 | |
min_samples_leaf = 10 | |
random_state = 42 | |
early_stopping = True | |
# Log model parameters | |
layer.log({ | |
"min_samples_leaf":min_samples_leaf, | |
"learning_rate":learning_rate, | |
"random_state":random_state, | |
"early_stopping":early_stopping, | |
"max_depth":max_depth | |
}) | |
# Model: Define a HistGradient Boosting Classifier | |
model = HistGradientBoostingClassifier(learning_rate=learning_rate,max_depth=max_depth,min_samples_leaf=min_samples_leaf,early_stopping=early_stopping,random_state=random_state) | |
# Pipeline fit | |
model.fit(X_train, y_train) | |
probs = model.predict_proba(X_test)[:,1] | |
# Calculate average precision and area under the receiver operating characteric curve (ROC AUC) | |
avg_precision = average_precision_score(y_test, probs, pos_label=1) | |
auc = roc_auc_score(y_test, probs) | |
layer.log({"AUC":f'{auc:.4f}'}) | |
layer.log({"avg_precision":f'{avg_precision:.4f}'}) | |
plt.figure(figsize=(12,8)) | |
plt.title("ROC Curve") | |
# plot no skill roc curve | |
plt.plot([0, 1], [0, 1], linestyle='--', label='No Skill') | |
# calculate roc curve for model | |
fpr, tpr, _ = roc_curve(y_test, probs) | |
# plot model roc curve | |
plt.plot(fpr, tpr, marker='.', label='GBM') | |
# axis labels | |
plt.xlabel('False Positive Rate') | |
plt.ylabel('True Positive Rate') | |
# show the legend | |
plt.legend() | |
layer.log({"ROC Curve":plt.gcf()}) | |
# calculate the precision-recall auc | |
plt.figure(figsize=(12,8)) | |
precision, recall, _ = precision_recall_curve(y_test, probs) | |
# calculate the no skill line as the proportion of the positive class | |
no_skill = len(y[y==1]) / len(y) | |
plt.title("Precision Recall curve") | |
# plot the no skill precision-recall curve | |
plt.plot([0, 1], [no_skill, no_skill], linestyle='--', label='No Skill') | |
# plot the model precision-recall curve | |
plt.plot(recall, precision, marker='.', label='GBM') | |
# axis labels | |
plt.xlabel('Recall') | |
plt.ylabel('Precision') | |
# show the legend | |
plt.legend() | |
layer.log({"Precision Recall":plt.gcf()}) | |
return model | |
layer.run([train]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment