Skip to content

Instantly share code, notes, and snippets.

@tomokishii
Last active September 24, 2021 17:35
Show Gist options
  • Star 6 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save tomokishii/bc110ef7b5939491753151695e22e139 to your computer and use it in GitHub Desktop.
Save tomokishii/bc110ef7b5939491753151695e22e139 to your computer and use it in GitHub Desktop.
TensorFlow Logistic Regression

Logistic Regression Demo by TensorFlow

Logistic Regression is the basic concept of recent "Deep" neural network models. I rechecked TensorFlow L.R. coding to classify IRIS dataset.

  • Binary Classification problem - iris_lr.py
  • Multi-class Classification problem - iris_lr_softmax.py
#
# iris_lr.py
# date. 5/6/2016
# IRIS data set classification
#
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import pandas as pd
import tensorflow as tf
def prep_data(target_class=1, train_siz=120, test_siz=30):
'''
class:
1. Iris Setosa, 2. Iris Versicolor, 3. Iris Virginica
'''
cols = ['sepal_len', 'sepal_wid', 'petal_len', 'petal_wid', 'class']
iris_df = pd.read_csv('iris.data', header=None, names=cols)
# Encode class
class_name = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
iris_df['iclass'] = [class_name.index(class_str)
for class_str in iris_df['class'].values]
# Random Shuffle before split to train/test
orig = np.arange(len(iris_df))
perm = np.copy(orig)
np.random.shuffle(perm)
iris = iris_df[['sepal_len', 'sepal_wid', 'petal_len', 'petal_wid', 'iclass']].values
iris[orig, :] = iris[perm, :]
# Arrange Label value to consider one vs. all classification
# ex. class 0 --> label 1.0, class 1 or 2 --> label 0.0
if target_class in [1, 2, 3]:
target_class = target_class - 1 # python indexing rule
for i in range(len(iris)):
iclass = int(iris[i, -1])
iris[i, -1] = float(iclass == target_class)
else:
print('Value Error (target_class)')
# Split dataset
trX = iris[:train_siz, :-1]
teX = iris[train_siz:, :-1]
trY = iris[:train_siz, -1]
teY = iris[train_siz:, -1]
return trX, trY, teX, teY
def linear_model(X, w, b):
output = tf.matmul(X, w) + b
return output
if __name__ == '__main__':
tr_x, tr_y, te_x, te_y = prep_data(target_class=1)
# Variables
x = tf.placeholder(tf.float32, [None, 4])
y_ = tf.placeholder(tf.float32, [None, 1])
p5 = tf.constant(0.5) # threshold of Logistic Regression
w = tf.Variable(tf.random_normal([4, 1], mean=0.0, stddev=0.05))
b = tf.Variable([0.])
y_pred = linear_model(x, w, b)
y_pred_sigmoid = tf.sigmoid(y_pred) # for prediction
# cross_entropy = -tf.reduce_sum(y_ * tf.log(y_pred_sigmoid))
x_entropy = tf.nn.sigmoid_cross_entropy_with_logits(y_pred, y_)
loss = tf.reduce_mean(x_entropy)
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
delta = tf.abs((y_ - y_pred_sigmoid))
correct_prediction = tf.cast(tf.less(delta, p5), tf.int32)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# Train
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
print('Training...')
for i in range(5001):
batch_xs, batch_ys = tr_x, tr_y
fd_train = {x: batch_xs, y_: batch_ys.reshape((-1, 1))}
train_step.run(fd_train)
if i % 500 == 0:
loss_step = loss.eval(fd_train)
train_accuracy = accuracy.eval(fd_train)
print(' step, loss, accurary = %6d: %8.3f,%8.3f' % (i,
loss_step, train_accuracy))
# Test trained model
fd_test = {x: te_x, y_: te_y.reshape((-1, 1))}
print('accuracy = %10.4f' % accuracy.eval(fd_test))
#
# iris_lr_softmax.py
# date. 5/6/2016
# IRIS data set classification
#
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import pandas as pd
import tensorflow as tf
def prep_data(train_siz=120, test_siz=30):
'''
class:
1. Iris Setosa, 2. Iris Versicolor, 3. Iris Virginica
'''
cols = ['sepal_len', 'sepal_wid', 'petal_len', 'petal_wid', 'class']
iris_df = pd.read_csv('iris.data', header=None, names=cols)
# Encode class
class_name = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
iris_df['iclass'] = [class_name.index(class_str)
for class_str in iris_df['class'].values]
# Random Shuffle before split to train/test
data_len = len(iris_df)
orig = np.arange(data_len)
perm = np.copy(orig)
np.random.shuffle(perm)
iris = iris_df[['sepal_len', 'sepal_wid', 'petal_len', 'petal_wid', 'iclass']].values
iris[orig, :] = iris[perm, :]
# generate onehot label data
label = np.zeros((data_len, 3), dtype=np.float32)
for i in range(data_len):
iclass = int(iris[i, -1])
label[iclass] = 1.0
# Split dataset
trX = iris[:train_siz, :-1]
teX = iris[train_siz:, :-1]
trY = label[:train_siz, :]
teY = label[train_siz:, :]
return trX, trY, teX, teY
def linear_model(X, w, b):
output = tf.matmul(X, w) + b
return output
if __name__ == '__main__':
tr_x, tr_y, te_x, te_y = prep_data()
# Variables
x = tf.placeholder(tf.float32, [None, 4])
y_ = tf.placeholder(tf.float32, [None, 3])
w = tf.Variable(tf.random_normal([4, 3], mean=0.0, stddev=0.05))
b = tf.Variable(tf.zeros([3]))
y_pred = linear_model(x, w, b)
y_pred_softmax = tf.nn.softmax(y_pred) # for prediction
loss = -tf.reduce_sum(y_*tf.log(y_pred_softmax))
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
correct_prediction = tf.equal(tf.argmax(y_pred_softmax, 1),
tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# Train
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
print('Training...')
for i in range(10001):
batch_xs, batch_ys = tr_x, tr_y
train_step.run({x: batch_xs, y_: batch_ys})
if i % 1000 == 0:
fd_train = {x: batch_xs, y_: batch_ys}
loss_step = loss.eval(fd_train)
train_accuracy = accuracy.eval(fd_train)
print(' step, loss, accurary = %6d: %8.3f,%8.3f' % (i,
loss_step, train_accuracy))
# Test trained model
fd_test = {x: te_x, y_: te_y}
print('accuracy = %10.4f' % accuracy.eval(fd_test))
@anirbanpramanik
Copy link

Hi - thank you for your good tutorial. I have a question - for "Binary Classification problem - iris_lr.py" how do I extract the probabilities for each row ? I mean we have 70 rows and each "y" is associated with a certain probability - like traditional logistic regression model. How do i get that ? Thanks for reading and replying.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment