-
-
Save vinhkhuc/e53a70f9e5c3f55852b0 to your computer and use it in GitHub Desktop.
# Implementation of a simple MLP network with one hidden layer. Tested on the iris data set. | |
# Requires: numpy, sklearn>=0.18.1, tensorflow>=1.0 | |
# NOTE: In order to make the code simple, we rewrite x * W_1 + b_1 = x' * W_1' | |
# where x' = [x | 1] and W_1' is the matrix W_1 appended with a new row with elements b_1's. | |
# Similarly, for h * W_2 + b_2 | |
import tensorflow as tf | |
import numpy as np | |
from sklearn import datasets | |
from sklearn.model_selection import train_test_split | |
RANDOM_SEED = 42 | |
tf.set_random_seed(RANDOM_SEED) | |
def init_weights(shape): | |
""" Weight initialization """ | |
weights = tf.random_normal(shape, stddev=0.1) | |
return tf.Variable(weights) | |
def forwardprop(X, w_1, w_2): | |
""" | |
Forward-propagation. | |
IMPORTANT: yhat is not softmax since TensorFlow's softmax_cross_entropy_with_logits() does that internally. | |
""" | |
h = tf.nn.sigmoid(tf.matmul(X, w_1)) # The \sigma function | |
yhat = tf.matmul(h, w_2) # The \varphi function | |
return yhat | |
def get_iris_data(): | |
""" Read the iris data set and split them into training and test sets """ | |
iris = datasets.load_iris() | |
data = iris["data"] | |
target = iris["target"] | |
# Prepend the column of 1s for bias | |
N, M = data.shape | |
all_X = np.ones((N, M + 1)) | |
all_X[:, 1:] = data | |
# Convert into one-hot vectors | |
num_labels = len(np.unique(target)) | |
all_Y = np.eye(num_labels)[target] # One liner trick! | |
return train_test_split(all_X, all_Y, test_size=0.33, random_state=RANDOM_SEED) | |
def main(): | |
train_X, test_X, train_y, test_y = get_iris_data() | |
# Layer's sizes | |
x_size = train_X.shape[1] # Number of input nodes: 4 features and 1 bias | |
h_size = 256 # Number of hidden nodes | |
y_size = train_y.shape[1] # Number of outcomes (3 iris flowers) | |
# Symbols | |
X = tf.placeholder("float", shape=[None, x_size]) | |
y = tf.placeholder("float", shape=[None, y_size]) | |
# Weight initializations | |
w_1 = init_weights((x_size, h_size)) | |
w_2 = init_weights((h_size, y_size)) | |
# Forward propagation | |
yhat = forwardprop(X, w_1, w_2) | |
predict = tf.argmax(yhat, axis=1) | |
# Backward propagation | |
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=yhat)) | |
updates = tf.train.GradientDescentOptimizer(0.01).minimize(cost) | |
# Run SGD | |
sess = tf.Session() | |
init = tf.global_variables_initializer() | |
sess.run(init) | |
for epoch in range(100): | |
# Train with each example | |
for i in range(len(train_X)): | |
sess.run(updates, feed_dict={X: train_X[i: i + 1], y: train_y[i: i + 1]}) | |
train_accuracy = np.mean(np.argmax(train_y, axis=1) == | |
sess.run(predict, feed_dict={X: train_X, y: train_y})) | |
test_accuracy = np.mean(np.argmax(test_y, axis=1) == | |
sess.run(predict, feed_dict={X: test_X, y: test_y})) | |
print("Epoch = %d, train accuracy = %.2f%%, test accuracy = %.2f%%" | |
% (epoch + 1, 100. * train_accuracy, 100. * test_accuracy)) | |
sess.close() | |
if __name__ == '__main__': | |
main() |
Thanks! really helpful!
Thanks for the example! I don't see the bias term for yhat when you estimate logits, am I missing something here? Besides Can you explain why you did not use nn.relu in the activation here?
the biases are created by init_weights
in what appears to be a normal-random distribution
the activation is a smooth sigmoid, which is appropriate given floats
Hi! im trying to learn tensorflow. this is the cleanest code i've seen so far. I'm currently trying it on my own data. but for some reason, I'm getting this error:
ValueError: Cannot feed value of shape (1,) for Tensor u'Placeholder_4:0', which has shape '(?, 1)'
from
ValueError Traceback (most recent call last) <ipython-input-63-586ae77f0bbb> in <module>() 2 #train with each example 3 for i in range(len(x_train)): ----> 4 sess.run(updates, feed_dict = {x: x_train[i: i+1], y: y_train[i: i+1]})
x_size = 10
y_size = 1
h_size = 256
The dataset contains
- over 600k rows
- target = 1 column (isFraud) = 1/0
- data = 9 variables + 1 bias
hope you can point to me where I'm making a mistake. thanks!
How big is your feature size? You might have to make the h_size according to your feature size for the 600K rows?
Why is the accuracy higher in the test data than in the training data? Normally it's the other way around: It trained on the training data, so it should be better at that than the test data, which it's never seen until you measure its accuracy.
And why are there only four input features (plus the bias term)? If these are pictures of flowers, then each pixel ought to be its own feature.
I am not sure but do you include the bias for the hidden layer? It seems that you just multiply x by w1 to get the hidden layer.
Agree with @MoleOrbitalHybridAnalyst. Biases do not need to be added as additional column to input vector but as additional term as in the following: h = tf.nn.sigmoid(tf.add(tf.matmul(X, w_1)), b)
I have a dataset with 5 columns, I am feeding in first 3 columns as my Inputs and the other 2 columns as my outputs. I have successfully executed the program but i am not sure how to test the model by giving my own values as input and getting a predicted output from the model.
Can anyone please help me, How can I actually test the model with my own value after training is done ?
At the end, you print the accuracy, but how would you print the predicted label in this case?
Nice Work, very useful !
@JeffreyBenjaminBrown The data set does not contain pictures of flowers but it contain features of the flowers such as the length and the width of the sepals and petals,in centimetres hence there are 4 features.
Thanks for code, but this code only work for classification,
Line 42 and 43 convert target column in to multiple columns based on unique values,
so how can i use this for regression?
i tried changing the code but it doesnot work after line 75 to 86, all output are 0 only.
Thanks for the example! I don't see the bias term for yhat when you estimate logits, am I missing something here? Besides Can you explain why you did not use nn.relu in the activation here?
I think In the forward prop function he uses Sigmoid Activation function also for bias term it is already taken care by tensorflow
Code updated for scikit-learn>=0.18 and tensorflow>=1.0