mwitiderrick/hist.py

## hist.py

import layer
layer.login()
layer.init("experiment-tracking")
from layer.decorators import model
@model("lgbm")
def train():
  from sklearn.ensemble import HistGradientBoostingClassifier
  from sklearn.datasets import make_classification
  from sklearn.model_selection import train_test_split
  import matplotlib.pyplot as plt
  from sklearn.metrics import average_precision_score, roc_auc_score, roc_curve,precision_recall_curve

  layer.log({"Description": "Experiment tracking with Layer"})

  X, y = make_classification(n_samples=1000, n_features=20)
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
  # Model Parameters
  learning_rate = 0.01
  max_depth = 6
  min_samples_leaf = 10
  random_state = 42
  early_stopping = True
  # Log model parameters
  layer.log({
    "min_samples_leaf":min_samples_leaf,
    "learning_rate":learning_rate,
    "random_state":random_state,
    "early_stopping":early_stopping,
    "max_depth":max_depth
    })
  # Model: Define a HistGradient Boosting Classifier
  model = HistGradientBoostingClassifier(learning_rate=learning_rate,max_depth=max_depth,min_samples_leaf=min_samples_leaf,early_stopping=early_stopping,random_state=random_state)
  # Pipeline fit
  model.fit(X_train, y_train)
  probs = model.predict_proba(X_test)[:,1]
  # Calculate average precision and area under the receiver operating characteric curve (ROC AUC)
  avg_precision = average_precision_score(y_test, probs, pos_label=1)
  auc = roc_auc_score(y_test, probs)
  layer.log({"AUC":f'{auc:.4f}'})
  layer.log({"avg_precision":f'{avg_precision:.4f}'})
  plt.figure(figsize=(12,8))
  plt.title("ROC Curve")
  # plot no skill roc curve
  plt.plot([0, 1], [0, 1], linestyle='--', label='No Skill')
  # calculate roc curve for model
  fpr, tpr, _ = roc_curve(y_test, probs)
  # plot model roc curve
  plt.plot(fpr, tpr, marker='.', label='GBM')
  # axis labels
  plt.xlabel('False Positive Rate')
  plt.ylabel('True Positive Rate')
  # show the legend
  plt.legend()
  layer.log({"ROC Curve":plt.gcf()})

  # calculate the precision-recall auc
  plt.figure(figsize=(12,8))
  precision, recall, _ = precision_recall_curve(y_test, probs)
  # calculate the no skill line as the proportion of the positive class
  no_skill = len(y[y==1]) / len(y)
  plt.title("Precision Recall curve")
  # plot the no skill precision-recall curve
  plt.plot([0, 1], [no_skill, no_skill], linestyle='--', label='No Skill')
  # plot the model precision-recall curve
  plt.plot(recall, precision, marker='.', label='GBM')
  # axis labels
  plt.xlabel('Recall')
  plt.ylabel('Precision')
  # show the legend
  plt.legend()
  layer.log({"Precision Recall":plt.gcf()})

  return model
  layer.run([train])

	import layer
	layer.login()
	layer.init("experiment-tracking")
	from layer.decorators import model
	@model("lgbm")
	def train():
	from sklearn.ensemble import HistGradientBoostingClassifier
	from sklearn.datasets import make_classification
	from sklearn.model_selection import train_test_split
	import matplotlib.pyplot as plt
	from sklearn.metrics import average_precision_score, roc_auc_score, roc_curve,precision_recall_curve

	layer.log({"Description": "Experiment tracking with Layer"})

	X, y = make_classification(n_samples=1000, n_features=20)
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
	# Model Parameters
	learning_rate = 0.01
	max_depth = 6
	min_samples_leaf = 10
	random_state = 42
	early_stopping = True
	# Log model parameters
	layer.log({
	"min_samples_leaf":min_samples_leaf,
	"learning_rate":learning_rate,
	"random_state":random_state,
	"early_stopping":early_stopping,
	"max_depth":max_depth
	})
	# Model: Define a HistGradient Boosting Classifier
	model = HistGradientBoostingClassifier(learning_rate=learning_rate,max_depth=max_depth,min_samples_leaf=min_samples_leaf,early_stopping=early_stopping,random_state=random_state)
	# Pipeline fit
	model.fit(X_train, y_train)
	probs = model.predict_proba(X_test)[:,1]
	# Calculate average precision and area under the receiver operating characteric curve (ROC AUC)
	avg_precision = average_precision_score(y_test, probs, pos_label=1)
	auc = roc_auc_score(y_test, probs)
	layer.log({"AUC":f'{auc:.4f}'})
	layer.log({"avg_precision":f'{avg_precision:.4f}'})
	plt.figure(figsize=(12,8))
	plt.title("ROC Curve")
	# plot no skill roc curve
	plt.plot([0, 1], [0, 1], linestyle='--', label='No Skill')
	# calculate roc curve for model
	fpr, tpr, _ = roc_curve(y_test, probs)
	# plot model roc curve
	plt.plot(fpr, tpr, marker='.', label='GBM')
	# axis labels
	plt.xlabel('False Positive Rate')
	plt.ylabel('True Positive Rate')
	# show the legend
	plt.legend()
	layer.log({"ROC Curve":plt.gcf()})

	# calculate the precision-recall auc
	plt.figure(figsize=(12,8))
	precision, recall, _ = precision_recall_curve(y_test, probs)
	# calculate the no skill line as the proportion of the positive class
	no_skill = len(y[y==1]) / len(y)
	plt.title("Precision Recall curve")
	# plot the no skill precision-recall curve
	plt.plot([0, 1], [no_skill, no_skill], linestyle='--', label='No Skill')
	# plot the model precision-recall curve
	plt.plot(recall, precision, marker='.', label='GBM')
	# axis labels
	plt.xlabel('Recall')
	plt.ylabel('Precision')
	# show the legend
	plt.legend()
	layer.log({"Precision Recall":plt.gcf()})

	return model
	layer.run([train])