afnanenayet/logistic_regression_tensorflow.py

## logistic_regression_tensorflow.py
# Implements a logistic regression with mini-batch gradient descent using tensorflow
# testing with the sklearn moons dataset

import tensorflow as tf
import numpy as np
from datetime import datetime
from sklearn.datasets import make_moons


def get_tb_dir(prefix=None) -> str:
    """ Returns a string using the current time that serves as the directory
    that will be used as the folder to save data for TensorBoard. It uses
    the current time data to prevent merging of data in TensorBoard.
    """
    now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
    return "{}/run-{}".format(prefix, now)


def log_reg_graph(X, y, n: int, learning_rate: float=0.01):
    """ Creates a tensorflow graph that can be used for a logistic regression.
    This can be reused multiple times.
    :param m: the number of elements in the X matrix (an m x n matrix)
    :param learning_rate: the learning rate to use for gradient descent
    """
    with tf.name_scope("log_reg") as scope:
        with tf.name_scope("variables") as scope:
            w = tf.Variable(tf.random_uniform(
                [n + 1, 1], -1.0, 1.0), name="weights")

        with tf.name_scope("pred") as scope:
            logits = tf.matmul(X, w, name="logits")
            y_pred = tf.sigmoid(logits, name="sigmoid")

        with tf.name_scope("loss") as scope:
            log_loss = tf.losses.log_loss(y, y_pred)
            log_loss_summary = tf.summary.scalar("log_loss_sum", log_loss)
            optimizer = tf.train.GradientDescentOptimizer(
                learning_rate=learning_rate)
            training_op = optimizer.minimize(log_loss)

        with tf.name_scope("init") as scope:
            init = tf.global_variables_initializer()

        with tf.name_scope("save") as scope:
            saver = tf.train.Saver()

    return w, logits, y_pred, log_loss, log_loss_summary, optimizer, \
        training_op, init, saver


def get_batch(X, y, batch_size: int):
    """ Given a matrix X, returns a mini-batch of "batch_size"
    given which batch we're on
    """
    indices = np.random.randint(0, len(X), size=batch_size)
    X_batch = X[indices].astype(dtype=np.float32)
    # y.reshape(-1, 1)[indices]
    y_batch = y.reshape(-1, 1)[indices].astype(dtype=np.float32)
    return X_batch, y_batch


def log_reg_with_ckpt(X_data, y_data, X_test, y_test, n, n_epochs, learning_rate, batch_size):
    """ Executes a logistic regression on a given X with a given y. This will
    log data and save checkpoints as well, just in case execution is
    interrupted.
    """
    # Variables needed for the graph
    # directory prefix for tensorboard and checkpoints
    dir_pref = get_tb_dir("./log_reg")
    n_batches = int(np.ceil(n / batch_size))
    X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
    y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

    # Create computation graph
    w, logits, y_pred, log_loss, log_loss_summary, optimizer, \
        training_op, init, saver = log_reg_graph(X, y, n, learning_rate)

    file_writer = tf.summary.FileWriter(dir_pref, tf.get_default_graph())

    with tf.Session() as sess:
        sess.run(init)

        for epoch in range(n_epochs):
            for batch_index in range(n_batches):
                X_batch, y_batch = get_batch(X_data, y_data, batch_size)
                # , y: y_batch})
                sess.run(training_op, feed_dict={X: X_batch, y: y_batch})

            if epoch % 100 == 0:
                print("Epoch:", epoch, " | log loss:", log_loss.eval(
                    feed_dict={X: X_test, y: y_test}))
                save_path = saver.save(
                    sess, "{}/logistic_model.ckpt".format(dir_pref))

            if epoch % 10 == 0:
                summary_str = log_loss_summary.eval(
                    feed_dict={X: X_test, y: y_test})
                file_writer.add_summary(summary_str, epoch)

        save_path = saver.save(
            sess, "{}/logistic_model_final.ckpt".format(dir_pref))
        file_writer.close()


def get_moons_data(m):
    """Retrieves the moons dataset and returns an X, and a y.
    """
    X_moons, y_moons = make_moons(m, noise=0.1, random_state=42)
    return X_moons, y_moons


def main():
    # retrieve the data
    m = 1000
    X_data, y_data = get_moons_data(m)
    X_data_bias = np.c_[np.ones((m, 1)), X_data]
    m, n = X_data.shape  # need the shape for tensorflow placeholders

    # split the test/train data
    test_ratio = 0.2
    test_size = int(m * test_ratio)
    X_train = X_data_bias[:-test_size]
    X_test = X_data_bias[-test_size:]
    y_train = y_data[:-test_size]
    y_test = y_data[-test_size:]

    tf.reset_default_graph()

    # run the regression
    log_reg_with_ckpt(X_train, y_train, X_test, y_test.reshape(-1, 1),
                      n, n_epochs=2000, learning_rate=0.1, batch_size=100)


if __name__ == "__main__":
    main()
	# Implements a logistic regression with mini-batch gradient descent using tensorflow
	# testing with the sklearn moons dataset

	import tensorflow as tf
	import numpy as np
	from datetime import datetime
	from sklearn.datasets import make_moons


	def get_tb_dir(prefix=None) -> str:
	""" Returns a string using the current time that serves as the directory
	that will be used as the folder to save data for TensorBoard. It uses
	the current time data to prevent merging of data in TensorBoard.
	"""
	now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
	return "{}/run-{}".format(prefix, now)


	def log_reg_graph(X, y, n: int, learning_rate: float=0.01):
	""" Creates a tensorflow graph that can be used for a logistic regression.
	This can be reused multiple times.
	:param m: the number of elements in the X matrix (an m x n matrix)
	:param learning_rate: the learning rate to use for gradient descent
	"""
	with tf.name_scope("log_reg") as scope:
	with tf.name_scope("variables") as scope:
	w = tf.Variable(tf.random_uniform(
	[n + 1, 1], -1.0, 1.0), name="weights")

	with tf.name_scope("pred") as scope:
	logits = tf.matmul(X, w, name="logits")
	y_pred = tf.sigmoid(logits, name="sigmoid")

	with tf.name_scope("loss") as scope:
	log_loss = tf.losses.log_loss(y, y_pred)
	log_loss_summary = tf.summary.scalar("log_loss_sum", log_loss)
	optimizer = tf.train.GradientDescentOptimizer(
	learning_rate=learning_rate)
	training_op = optimizer.minimize(log_loss)

	with tf.name_scope("init") as scope:
	init = tf.global_variables_initializer()

	with tf.name_scope("save") as scope:
	saver = tf.train.Saver()

	return w, logits, y_pred, log_loss, log_loss_summary, optimizer, \
	training_op, init, saver


	def get_batch(X, y, batch_size: int):
	""" Given a matrix X, returns a mini-batch of "batch_size"
	given which batch we're on
	"""
	indices = np.random.randint(0, len(X), size=batch_size)
	X_batch = X[indices].astype(dtype=np.float32)
	# y.reshape(-1, 1)[indices]
	y_batch = y.reshape(-1, 1)[indices].astype(dtype=np.float32)
	return X_batch, y_batch


	def log_reg_with_ckpt(X_data, y_data, X_test, y_test, n, n_epochs, learning_rate, batch_size):
	""" Executes a logistic regression on a given X with a given y. This will
	log data and save checkpoints as well, just in case execution is
	interrupted.
	"""
	# Variables needed for the graph
	# directory prefix for tensorboard and checkpoints
	dir_pref = get_tb_dir("./log_reg")
	n_batches = int(np.ceil(n / batch_size))
	X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
	y = tf.placeholder(tf.float32, shape=(None, 1), name="y")

	# Create computation graph
	w, logits, y_pred, log_loss, log_loss_summary, optimizer, \
	training_op, init, saver = log_reg_graph(X, y, n, learning_rate)

	file_writer = tf.summary.FileWriter(dir_pref, tf.get_default_graph())

	with tf.Session() as sess:
	sess.run(init)

	for epoch in range(n_epochs):
	for batch_index in range(n_batches):
	X_batch, y_batch = get_batch(X_data, y_data, batch_size)
	# , y: y_batch})
	sess.run(training_op, feed_dict={X: X_batch, y: y_batch})

	if epoch % 100 == 0:
	print("Epoch:", epoch, " \| log loss:", log_loss.eval(
	feed_dict={X: X_test, y: y_test}))
	save_path = saver.save(
	sess, "{}/logistic_model.ckpt".format(dir_pref))

	if epoch % 10 == 0:
	summary_str = log_loss_summary.eval(
	feed_dict={X: X_test, y: y_test})
	file_writer.add_summary(summary_str, epoch)

	save_path = saver.save(
	sess, "{}/logistic_model_final.ckpt".format(dir_pref))
	file_writer.close()


	def get_moons_data(m):
	"""Retrieves the moons dataset and returns an X, and a y.
	"""
	X_moons, y_moons = make_moons(m, noise=0.1, random_state=42)
	return X_moons, y_moons


	def main():
	# retrieve the data
	m = 1000
	X_data, y_data = get_moons_data(m)
	X_data_bias = np.c_[np.ones((m, 1)), X_data]
	m, n = X_data.shape # need the shape for tensorflow placeholders

	# split the test/train data
	test_ratio = 0.2
	test_size = int(m * test_ratio)
	X_train = X_data_bias[:-test_size]
	X_test = X_data_bias[-test_size:]
	y_train = y_data[:-test_size]
	y_test = y_data[-test_size:]

	tf.reset_default_graph()

	# run the regression
	log_reg_with_ckpt(X_train, y_train, X_test, y_test.reshape(-1, 1),
	n, n_epochs=2000, learning_rate=0.1, batch_size=100)


	if __name__ == "__main__":
	main()