Pratik-Shukla-22/Mini_Batch_Gradient_Descent.py

## Mini_Batch_Gradient_Descent.py
#Fetch the data file from GitHub repository:
!wget https://raw.githubusercontent.com/Pratik-Shukla-22/Gradient-Descent/main/Advertising.csv

#Import the required libraries:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#Read the data file:
data = pd.read_csv("Advertising.csv")
data.head()

#Define the feature and target variables:
X = data[["TV","radio","newspaper"]]
Y = data["sales"]

#Reshape the data in Y:
Y = np.asarray(Y)
Y = np.reshape(Y,(Y.shape[0],1))

#Normalize the data:
X = (X - X.mean())/X.std()
Y = Y - Y.mean()/Y.std()

#Function to get intial weights and bias:
def initialize_weights(n_features):
  bias = np.random.random(1)
  weights = np.random.random(n_features)

  #Reshape the bias and weights:
  bias = np.reshape(bias,(1,1))
  weights = np.reshape(weights, (1,X.shape[1]))

  return bias,weights

#Predict the value of target variable based on the random weights:
def predict(bias, weights, X):
  predicted_value = bias+np.dot(X,weights.T)
  return predicted_value

#Calculate the cost:
def calculate_cost(Y, Y_pred):
  error = Y_pred - Y
  cost = np.sum((error)**2)/len(error)
  return cost

#Update the parameter values:
def update_parameters(X,Y,Y_pred,bias,weights,lr):
  #Calculating the gradients:
  db = (np.sum(Y_pred-Y)*2)/len(Y)
  dw = (np.dot((Y_pred-Y).T,X)*2)/len(Y)

  #Updating the parameters:
  bias = bias - lr*db
  weights = weights - lr*dw

  #Return the updated parameters:
  return bias, weights

#The main function to run the gradient descent algorithm:
def run_mini_batch_gradient_descent(X,Y,lr,iter):
  #Create an empty list to store cost values:
  cost_list = []

  #Get the initial values of weights and bias:
  bias, weights = initialize_weights(X.shape[1])

  for i in range(iter):
    #Shuffle indices:
    shuffled_indices = np.random.permutation(len(Y))
    #Get a random index:
    i = np.random.randint(0,len(X)-20)

    #Get a random sample of 20 examples:
    X_sample = X.iloc[shuffled_indices[i:i+20]]
    Y_sample = Y[shuffled_indices[i:i+20]]

    #Predict the value of the target variable:
    Y_pred = predict(bias, weights, X_sample)

    #Calculate the cost associated with prediction:
    cost = calculate_cost(Y_sample, Y_pred)

    #Append the cost to the list:
    cost_list.append(cost)

    #Update the parameters using gradient descent:
    bias, weights = update_parameters(X_sample,Y_sample,Y_pred,bias,weights,lr)

  #Return the cost list:
  return bias, weights,cost_list

#Run the gradient descent algorithm:
bias, weights,cost = run_mini_batch_gradient_descent(X,Y,lr=0.01,iter=200)

#Print the final values of weights:
print("Weights=",weights)

#Print the final value of bias:
print("Bias=",bias)

#Plot the graph of iter. vs cost:
plt.title("Iterations vs. Cost")
plt.xlabel("Iterations")
plt.ylabel("MSE cost")
plt.plot(cost)
plt.plot(cost,label="Mini Batch Gradient Descent")
plt.legend()
plt.show()

#Run the gradient descent algorithm:
bias1, weights1, cost1 = run_mini_batch_gradient_descent(X,Y,lr=0.01,iter=1000)
bias2, weights2, cost2 = run_mini_batch_gradient_descent(X,Y,lr=0.001,iter=1000)

#Plot the graphs:
plt.title("Iterations vs. Cost")
plt.xlabel("Iterations")
plt.ylabel("MSE cost")
plt.plot(cost1,label="LR=0.01")
plt.plot(cost2,label="LR=0.001")
plt.legend()
plt.show()
	#Fetch the data file from GitHub repository:
	!wget https://raw.githubusercontent.com/Pratik-Shukla-22/Gradient-Descent/main/Advertising.csv

	#Import the required libraries:
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt

	#Read the data file:
	data = pd.read_csv("Advertising.csv")
	data.head()

	#Define the feature and target variables:
	X = data[["TV","radio","newspaper"]]
	Y = data["sales"]

	#Reshape the data in Y:
	Y = np.asarray(Y)
	Y = np.reshape(Y,(Y.shape[0],1))

	#Normalize the data:
	X = (X - X.mean())/X.std()
	Y = Y - Y.mean()/Y.std()

	#Function to get intial weights and bias:
	def initialize_weights(n_features):
	bias = np.random.random(1)
	weights = np.random.random(n_features)

	#Reshape the bias and weights:
	bias = np.reshape(bias,(1,1))
	weights = np.reshape(weights, (1,X.shape[1]))

	return bias,weights

	#Predict the value of target variable based on the random weights:
	def predict(bias, weights, X):
	predicted_value = bias+np.dot(X,weights.T)
	return predicted_value

	#Calculate the cost:
	def calculate_cost(Y, Y_pred):
	error = Y_pred - Y
	cost = np.sum((error)**2)/len(error)
	return cost

	#Update the parameter values:
	def update_parameters(X,Y,Y_pred,bias,weights,lr):
	#Calculating the gradients:
	db = (np.sum(Y_pred-Y)*2)/len(Y)
	dw = (np.dot((Y_pred-Y).T,X)*2)/len(Y)

	#Updating the parameters:
	bias = bias - lr*db
	weights = weights - lr*dw

	#Return the updated parameters:
	return bias, weights

	#The main function to run the gradient descent algorithm:
	def run_mini_batch_gradient_descent(X,Y,lr,iter):
	#Create an empty list to store cost values:
	cost_list = []

	#Get the initial values of weights and bias:
	bias, weights = initialize_weights(X.shape[1])

	for i in range(iter):
	#Shuffle indices:
	shuffled_indices = np.random.permutation(len(Y))
	#Get a random index:
	i = np.random.randint(0,len(X)-20)

	#Get a random sample of 20 examples:
	X_sample = X.iloc[shuffled_indices[i:i+20]]
	Y_sample = Y[shuffled_indices[i:i+20]]

	#Predict the value of the target variable:
	Y_pred = predict(bias, weights, X_sample)

	#Calculate the cost associated with prediction:
	cost = calculate_cost(Y_sample, Y_pred)

	#Append the cost to the list:
	cost_list.append(cost)

	#Update the parameters using gradient descent:
	bias, weights = update_parameters(X_sample,Y_sample,Y_pred,bias,weights,lr)

	#Return the cost list:
	return bias, weights,cost_list

	#Run the gradient descent algorithm:
	bias, weights,cost = run_mini_batch_gradient_descent(X,Y,lr=0.01,iter=200)

	#Print the final values of weights:
	print("Weights=",weights)

	#Print the final value of bias:
	print("Bias=",bias)

	#Plot the graph of iter. vs cost:
	plt.title("Iterations vs. Cost")
	plt.xlabel("Iterations")
	plt.ylabel("MSE cost")
	plt.plot(cost)
	plt.plot(cost,label="Mini Batch Gradient Descent")
	plt.legend()
	plt.show()

	#Run the gradient descent algorithm:
	bias1, weights1, cost1 = run_mini_batch_gradient_descent(X,Y,lr=0.01,iter=1000)
	bias2, weights2, cost2 = run_mini_batch_gradient_descent(X,Y,lr=0.001,iter=1000)

	#Plot the graphs:
	plt.title("Iterations vs. Cost")
	plt.xlabel("Iterations")
	plt.ylabel("MSE cost")
	plt.plot(cost1,label="LR=0.01")
	plt.plot(cost2,label="LR=0.001")
	plt.legend()
	plt.show()