Henistein/HeniNet.py

## HeniNet.py
import numpy as np
import matplotlib.pyplot as plt

class HeniNet:
  def __init__(self, layers, lr=0.001, epochs=1000):
    self.X = None
    self.Y = None
    self.layers = layers
    self.lr = lr
    self.epochs = epochs
    self.loss_hist = list()

    # parameters
    self.Z1 = None
    self.Z2 = None
    self.A1 = None
    self.A2 = None

  def init_weights(self):
    # layer1 and layer2 -> (Input, Compute) (Compute, Output)
    np.random.seed(1)
    self.l1 = np.random.randn(self.layers[0], self.layers[1])
    self.b1 = np.random.randn(self.layers[1],)
    self.l2 = np.random.randn(self.layers[1], self.layers[2])
    self.b2 = np.random.randn(self.layers[2],)

  def relu(self, Z):
    return np.maximum(0, Z)

  def derivative_relu(self, x):
    return (x > 0)

  def eta(self, x):
    ETA = 0.0000000001
    return np.maximum(x, ETA)

  def sigmoid(self, Z):
    return 1/(1+np.exp(-Z))

  def derivative_sigmoid(self, Z):
    return Z * (1-Z)

  def entropy_loss(self,y, yhat):
    nsample = len(y)
    yhat_inv = 1.0 - yhat
    y_inv = 1.0 - y
    yhat = self.eta(yhat) ## clips value to avoid NaNs in log
    yhat_inv = self.eta(yhat_inv)
    loss = -1/nsample * (np.sum(np.multiply(np.log(yhat), y) + np.multiply((y_inv), np.log(yhat_inv))))
    return loss

  def forward(self):
    # Forward algorithm:
    # Z1 = (W1 * X) + b
    # A1 = Relu(Z1)
    # Z2 = (W2 * A1)
    # A2 = sigmoid(Z2)
    # loss(A2, Y)

    # in this model I don't use bias
    Z1 = self.X.dot(self.l1) + self.b1
    A1 = self.relu(Z1)
    Z2 = A1.dot(self.l2) + self.b2
    yhat = self.sigmoid(Z2)
    loss = self.entropy_loss(self.Y, yhat)

    # save parameters
    self.Z1 = Z1
    self.Z2 = Z2
    self.A1 = A1

    return yhat, loss

  def backward(self, yhat):

    # Starting from the last layer calculate the d/dyhat of the loss
    # d/dyhat = -(y-yhat)/(yhat*(1-yhat))
    d_yhat = np.divide(-(self.Y-yhat), (yhat*(1-yhat)))
    # Now the derivative of sigmoid
    d_sig = self.derivative_sigmoid(yhat)
    # Then we get Z2:
    d_Z2 = d_yhat * d_sig

    # We got Z2 by calculating the dot product between A1 and W2 and added b2
    # So we have to calculate the loss with the respect to all these variables:

    d_A1 = d_Z2.dot(self.l2.T)
    d_l2 = self.A1.T.dot(d_Z2)
    d_b2 = np.sum(d_Z2, axis=0, keepdims=True)

    d_Z1 = d_A1 * self.relu(self.Z1)
    d_l1 = self.X.T.dot(d_Z1)
    d_b1 = np.sum(d_Z1, axis=0, keepdims=True)

    # Update the weights and bias
    self.l1 = self.l1 - self.lr * d_l1
    self.l2 = self.l2 - self.lr * d_l2
    self.b1 = self.b1 - self.lr * d_b1
    self.b2 = self.b2 - self.lr * d_b2

  def fit(self, X, Y):
    self.X = X
    self.Y = Y
    self.init_weights()

    for i in range(self.epochs):
      yhat, loss = self.forward()
      self.backward(yhat)
      self.loss_hist.append(loss)

  def predict(self, X_test, rnd=False):
    Z1 = X_test.dot(self.l1) + self.b1
    A1 = self.relu(Z1)
    Z2 = A1.dot(self.l2) + self.b2
    pred = self.sigmoid(Z2)

    if rnd:
      return np.round(pred)
    else:
      return pred

  def plot_loss(self):
    plt.plot(self.loss_hist)
    plt.xlabel("Iteration")
    plt.ylabel("logloss")
    plt.title("Loss curve for training")
    plt.show()
	import numpy as np
	import matplotlib.pyplot as plt

	class HeniNet:
	def __init__(self, layers, lr=0.001, epochs=1000):
	self.X = None
	self.Y = None
	self.layers = layers
	self.lr = lr
	self.epochs = epochs
	self.loss_hist = list()

	# parameters
	self.Z1 = None
	self.Z2 = None
	self.A1 = None
	self.A2 = None

	def init_weights(self):
	# layer1 and layer2 -> (Input, Compute) (Compute, Output)
	np.random.seed(1)
	self.l1 = np.random.randn(self.layers[0], self.layers[1])
	self.b1 = np.random.randn(self.layers[1],)
	self.l2 = np.random.randn(self.layers[1], self.layers[2])
	self.b2 = np.random.randn(self.layers[2],)

	def relu(self, Z):
	return np.maximum(0, Z)

	def derivative_relu(self, x):
	return (x > 0)

	def eta(self, x):
	ETA = 0.0000000001
	return np.maximum(x, ETA)

	def sigmoid(self, Z):
	return 1/(1+np.exp(-Z))

	def derivative_sigmoid(self, Z):
	return Z * (1-Z)

	def entropy_loss(self,y, yhat):
	nsample = len(y)
	yhat_inv = 1.0 - yhat
	y_inv = 1.0 - y
	yhat = self.eta(yhat) ## clips value to avoid NaNs in log
	yhat_inv = self.eta(yhat_inv)
	loss = -1/nsample * (np.sum(np.multiply(np.log(yhat), y) + np.multiply((y_inv), np.log(yhat_inv))))
	return loss

	def forward(self):
	# Forward algorithm:
	# Z1 = (W1 * X) + b
	# A1 = Relu(Z1)
	# Z2 = (W2 * A1)
	# A2 = sigmoid(Z2)
	# loss(A2, Y)

	# in this model I don't use bias
	Z1 = self.X.dot(self.l1) + self.b1
	A1 = self.relu(Z1)
	Z2 = A1.dot(self.l2) + self.b2
	yhat = self.sigmoid(Z2)
	loss = self.entropy_loss(self.Y, yhat)

	# save parameters
	self.Z1 = Z1
	self.Z2 = Z2
	self.A1 = A1

	return yhat, loss

	def backward(self, yhat):

	# Starting from the last layer calculate the d/dyhat of the loss
	# d/dyhat = -(y-yhat)/(yhat*(1-yhat))
	d_yhat = np.divide(-(self.Y-yhat), (yhat*(1-yhat)))
	# Now the derivative of sigmoid
	d_sig = self.derivative_sigmoid(yhat)
	# Then we get Z2:
	d_Z2 = d_yhat * d_sig

	# We got Z2 by calculating the dot product between A1 and W2 and added b2
	# So we have to calculate the loss with the respect to all these variables:

	d_A1 = d_Z2.dot(self.l2.T)
	d_l2 = self.A1.T.dot(d_Z2)
	d_b2 = np.sum(d_Z2, axis=0, keepdims=True)

	d_Z1 = d_A1 * self.relu(self.Z1)
	d_l1 = self.X.T.dot(d_Z1)
	d_b1 = np.sum(d_Z1, axis=0, keepdims=True)

	# Update the weights and bias
	self.l1 = self.l1 - self.lr * d_l1
	self.l2 = self.l2 - self.lr * d_l2
	self.b1 = self.b1 - self.lr * d_b1
	self.b2 = self.b2 - self.lr * d_b2

	def fit(self, X, Y):
	self.X = X
	self.Y = Y
	self.init_weights()

	for i in range(self.epochs):
	yhat, loss = self.forward()
	self.backward(yhat)
	self.loss_hist.append(loss)

	def predict(self, X_test, rnd=False):
	Z1 = X_test.dot(self.l1) + self.b1
	A1 = self.relu(Z1)
	Z2 = A1.dot(self.l2) + self.b2
	pred = self.sigmoid(Z2)

	if rnd:
	return np.round(pred)
	else:
	return pred

	def plot_loss(self):
	plt.plot(self.loss_hist)
	plt.xlabel("Iteration")
	plt.ylabel("logloss")
	plt.title("Loss curve for training")
	plt.show()