Skip to content

Instantly share code, notes, and snippets.

@tnbred
Created October 15, 2017 19:28
Show Gist options
  • Save tnbred/f136c2bc40815517e0aa1139bd2060ee to your computer and use it in GitHub Desktop.
Save tnbred/f136c2bc40815517e0aa1139bd2060ee to your computer and use it in GitHub Desktop.
import argparse
import tensorflow as tf
import pandas as pd
import numpy as np
import xgboost as xgb
import json
from datetime import timedelta
from math import log, ceil
from sklearn.metrics import log_loss
from sklearn.grid_search import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.utils import shuffle
num_examples = 100000
split = 0.2
num_features = 1
def generate_input_data(num_examples, num_features):
features = []
labels = []
for i in xrange(num_examples):
features.append(np.random.rand(num_features) * np.random.randint(1, 10) + np.random.rand(num_features))
if np.random.randint(101) > 90:
features[i-1][np.random.randint(num_features)] = 0
hard = ceil(np.sum(features[i-1])) % 2
easy = 0
if features[i-1][0] > 3:
easy = 1
labels.append(easy)
df = pd.concat(
[
pd.DataFrame(features),
pd.Series(labels).rename('labels')
],
axis=1,
)
return df
def one_hot_encoding(train_df):
#TODO: handle categorical feature one hot encoding.
return 0, 0
def scale_data(train_df, test_df):
categorical_columns, encoding = one_hot_encoding(train_df)
scaler = MinMaxScaler(feature_range=(0,1))
scaler.fit(train_df.drop(['labels'], axis=1))
train_df = pd.concat(
[
pd.DataFrame(scaler.transform(train_df.drop('labels', axis=1))),
train_df['labels']
],
axis=1,
)
test_df = pd.concat(
[
pd.DataFrame(scaler.transform(test_df.drop('labels', axis=1))),
test_df['labels']
],
axis=1,
)
return train_df, test_df
def preprocess_data(train_df, test_df):
all_dfs = [train_df, test_df]
features = set()
for df in all_dfs:
features |= set(df.columns)
for df in all_dfs:
for f in features:
if f not in df.columns:
df[f] = 0.0
for df in all_dfs:
df.sort_index(axis=1, inplace=True)
train_df, test_df = scale_data(train_df, test_df)
train_df = shuffle(train_df).reset_index(drop=True)
return train_df, test_df
def get_data(num_examples, split):
train_df = generate_input_data(num_examples, num_features)
test_df = generate_input_data(int(ceil(num_examples*split)), num_features)
return preprocess_data(train_df, test_df)
def get_batch(df, batch_size, epoch):
start = batch_size*epoch-batch_size
end = batch_size*epoch
if end > len(df):
end = len(df)
size = end - start
batch_x = df.drop('labels', axis=1)[start:end].as_matrix()
batch_y = df['labels'][start:end].as_matrix().reshape(size, 1)
return batch_x, batch_y
train_df, test_df = get_data(num_examples, split)
n_hidden_1 = 8
n_hidden_2 = 4
learning_rate = 0.01
batch_size = 500
num_epochs = 200
display_epoch = 50
def neural_net(x):
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer
weights = {
'h1': tf.Variable(tf.random_normal([num_features, n_hidden_1])),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_hidden_2, 1]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'b2': tf.Variable(tf.random_normal([n_hidden_2])),
'out': tf.Variable(tf.random_normal([1]))
}
X = tf.placeholder(tf.float32, shape=(None, num_features))
Y = tf.placeholder(tf.float32, shape=(None, 1))
logits = neural_net(X)
loss_op = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
predictions = tf.sigmoid(logits)
predicted_class = tf.greater(predictions, 0.5)
correct = tf.equal(predicted_class, tf.equal(Y,1.0))
accuracy = tf.reduce_mean( tf.cast(correct, 'float') )
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
for epoch in range(1, num_epochs + 1):
batch_x, batch_y = get_batch(train_df, batch_size, epoch)
sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
if epoch % display_epoch == 0 or epoch == 1:
loss, acc , pred, fff= sess.run([loss_op, accuracy, predictions, logits],
feed_dict={X: batch_x,
Y: batch_y})
c = ', '.join('{}={}'.format(*t) for t in zip(pred, batch_y))
print("[{}] Batch loss={:.4f}, Accuracy={:.5f}, Logits vs labels= {}".format(epoch, loss, acc, c))
print("Optimization Finished!")
batch_x, batch_y = get_batch(test_df, batch_size, 1)
print("Testing Accuracy:", \
sess.run(accuracy, feed_dict={X: batch_x,
Y: batch_y}))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment