Skip to content

Instantly share code, notes, and snippets.

@mwitiderrick
Created June 15, 2022 07:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mwitiderrick/d4872140611529956083400313e23903 to your computer and use it in GitHub Desktop.
Save mwitiderrick/d4872140611529956083400313e23903 to your computer and use it in GitHub Desktop.
import layer
layer.login()
layer.init("experiment-tracking")
from layer.decorators import model
@model("lgbm")
def train():
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import average_precision_score, roc_auc_score, roc_curve,precision_recall_curve
layer.log({"Description": "Experiment tracking with Layer"})
X, y = make_classification(n_samples=1000, n_features=20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
# Model Parameters
learning_rate = 0.01
max_depth = 6
min_samples_leaf = 10
random_state = 42
early_stopping = True
# Log model parameters
layer.log({
"min_samples_leaf":min_samples_leaf,
"learning_rate":learning_rate,
"random_state":random_state,
"early_stopping":early_stopping,
"max_depth":max_depth
})
# Model: Define a HistGradient Boosting Classifier
model = HistGradientBoostingClassifier(learning_rate=learning_rate,max_depth=max_depth,min_samples_leaf=min_samples_leaf,early_stopping=early_stopping,random_state=random_state)
# Pipeline fit
model.fit(X_train, y_train)
probs = model.predict_proba(X_test)[:,1]
# Calculate average precision and area under the receiver operating characteric curve (ROC AUC)
avg_precision = average_precision_score(y_test, probs, pos_label=1)
auc = roc_auc_score(y_test, probs)
layer.log({"AUC":f'{auc:.4f}'})
layer.log({"avg_precision":f'{avg_precision:.4f}'})
plt.figure(figsize=(12,8))
plt.title("ROC Curve")
# plot no skill roc curve
plt.plot([0, 1], [0, 1], linestyle='--', label='No Skill')
# calculate roc curve for model
fpr, tpr, _ = roc_curve(y_test, probs)
# plot model roc curve
plt.plot(fpr, tpr, marker='.', label='GBM')
# axis labels
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
# show the legend
plt.legend()
layer.log({"ROC Curve":plt.gcf()})
# calculate the precision-recall auc
plt.figure(figsize=(12,8))
precision, recall, _ = precision_recall_curve(y_test, probs)
# calculate the no skill line as the proportion of the positive class
no_skill = len(y[y==1]) / len(y)
plt.title("Precision Recall curve")
# plot the no skill precision-recall curve
plt.plot([0, 1], [no_skill, no_skill], linestyle='--', label='No Skill')
# plot the model precision-recall curve
plt.plot(recall, precision, marker='.', label='GBM')
# axis labels
plt.xlabel('Recall')
plt.ylabel('Precision')
# show the legend
plt.legend()
layer.log({"Precision Recall":plt.gcf()})
return model
layer.run([train])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment