import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

def traindata():
    x1_label0 = np.random.normal(1, 1, (100, 1))
    x2_label0 = np.random.normal(1, 1, (100, 1))
    x1_label1 = np.random.normal(5, 1, (100, 1))
    x2_label1 = np.random.normal(4, 1, (100, 1))
    x1_label2 = np.random.normal(8, 1, (100, 1))
    x2_label2 = np.random.normal(0, 1, (100, 1))

    xs_label0 = np.hstack((x1_label0, x2_label0))
    xs_label1 = np.hstack((x1_label1, x2_label1))
    xs_label2 = np.hstack((x1_label2, x2_label2))
    xs = np.vstack((xs_label0, xs_label1, xs_label2))
    labels = np.matrix([[1., 0., 0.]] * len(x1_label0) + [[0., 1., 0.]] * len(x1_label1) + [[0., 0., 1.]] * len(x1_label2))
    arr = np.arange(xs.shape[0])
    np.random.shuffle(arr)
    xs = xs[arr, :]
    labels = labels[arr, :]
    return xs, labels

def testdata():
    test_x1_label0 = np.random.normal(1, 1, (10, 1))
    test_x2_label0 = np.random.normal(1, 1, (10, 1))
    test_x1_label1 = np.random.normal(5, 1, (10, 1))
    test_x2_label1 = np.random.normal(4, 1, (10, 1))
    test_x1_label2 = np.random.normal(8, 1, (10, 1))
    test_x2_label2 = np.random.normal(0, 1, (10, 1))
    test_xs_label0 = np.hstack((test_x1_label0, test_x2_label0))
    test_xs_label1 = np.hstack((test_x1_label1, test_x2_label1))
    test_xs_label2 = np.hstack((test_x1_label2, test_x2_label2))
    test_xs = np.vstack((test_xs_label0, test_xs_label1, test_xs_label2))
    test_labels = np.matrix([[1., 0., 0.]] * 10 + [[0., 1., 0.]] * 10 + [[0., 0., 1.]] * 10)
    return test_xs, test_labels

train, train_label = traindata()
test, test_label = testdata()

train_size, num_features = train.shape

learning_rate = 0.01
training_epochs = 1000
num_labels = 3
batch_size = 100

X = tf.placeholder("float", shape=[None, num_features])
Y = tf.placeholder("float", shape=[None, num_labels])
W = tf.Variable(tf.zeros([num_features, num_labels]))
b = tf.Variable(tf.zeros([num_labels]))
y_model = tf.nn.softmax(tf.matmul(X, W) + b)
cost = -tf.reduce_sum(Y * tf.log(y_model))
train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
correct_prediction = tf.equal(tf.argmax(y_model, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    for step in range(training_epochs * train_size // batch_size):
        offset = (step * batch_size) % train_size
        batch_xs = train[offset:(offset + batch_size), :]
        batch_labels = train_label[offset:(offset + batch_size)]
        err, _ = sess.run([cost, train_op], feed_dict={X: batch_xs, Y: batch_labels})
        print(step,err)
    W_val = sess.run(W)
    print('w', W_val)
    b_val = sess.run(b)
    print('b', b_val)
    print("accuracy", accuracy.eval(feed_dict={X: test, Y: test_label}))