Skip to content

Instantly share code, notes, and snippets.

@fumiakiy
Created June 14, 2019 19:51
Show Gist options
  • Save fumiakiy/c1f8fe23b36b0a8984a12cd2bb54cd0e to your computer and use it in GitHub Desktop.
Save fumiakiy/c1f8fe23b36b0a8984a12cd2bb54cd0e to your computer and use it in GitHub Desktop.
import os
import re
import sklearn
import numpy as np
import tensorflow as tf
def main():
path = './normalized/'
data = []
for i in range(51):
data.append([])
labels = []
for f in os.listdir(path):
m = re.match(r'(\w)_\d\.txt', f)
if (m == None):
continue
char = m.group(1)
with open(os.path.join(path + f), 'r') as lines:
i = 0
for line in lines:
data[i].append(int(line.rstrip()))
i += 1
labels.append(0 if char == 'a' else 1 if char == 'b' else 2)
# construct the ml stuff
features = {}
feature_columns = []
for i in range(len(data)):
key = str(i)
features[key] = np.array(data[i])
feature_columns.append(tf.feature_column.numeric_column(key=key))
# Build 2 hidden layer DNN with 10, 10 units respectively.
classifier = tf.estimator.DNNClassifier(
feature_columns=feature_columns,
# Two hidden layers of 10 nodes each.
hidden_units=[10, 10],
# The model must choose between 3 classes.
n_classes=3)
#classifier = tf.estimator.LinearClassifier(
# feature_columns=feature_columns,
# # The model must choose between 3 classes.
# n_classes=3)
batch_size = 100
train_steps = 1000
# Train the Model.
classifier.train(
input_fn=lambda:train_input_fn(features, np.array(labels),
batch_size),
steps=train_steps)
# Evaluate the model.
eval_result = classifier.evaluate(
input_fn=lambda:eval_input_fn(features, np.array(labels),
batch_size))
print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))
test_data = []
test_labels = ['2', '0', '1']
for i in range(51):
test_data.append([])
with open(os.path.join(path + 'c.txt'), 'r') as lines:
i = 0
for line in lines:
test_data[i].append(int(line.rstrip()))
i += 1
with open(os.path.join(path + 'a.txt'), 'r') as lines:
i = 0
for line in lines:
test_data[i].append(int(line.rstrip()))
i += 1
with open(os.path.join(path + 'b.txt'), 'r') as lines:
i = 0
for line in lines:
test_data[i].append(int(line.rstrip()))
i += 1
test_features = {}
for i in range(len(test_data)):
test_features[str(i)] = np.array(test_data[i])
predictions = classifier.predict(
input_fn=lambda:eval_input_fn(test_features,
labels=None,
batch_size=batch_size))
template = ('\nPrediction is "{}" ({:.1f}%), expected "{}"')
for pred_dict, expec in zip(predictions, test_labels):
class_id = pred_dict['class_ids'][0]
probability = pred_dict['probabilities'][class_id]
print(template.format(class_id,
100 * probability, expec))
def train_input_fn(features, labels, batch_size):
"""An input function for training"""
# Convert the inputs to a Dataset.
dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
# Shuffle, repeat, and batch the examples.
#dataset = dataset.shuffle(1000).repeat().batch(batch_size)
dataset = dataset.repeat().batch(batch_size)
# Return the dataset.
return dataset
def eval_input_fn(features, labels, batch_size):
"""An input function for evaluation or prediction"""
features=dict(features)
if labels is None:
# No labels, use only features.
inputs = features
else:
inputs = (features, labels)
# Convert the inputs to a Dataset.
dataset = tf.data.Dataset.from_tensor_slices(inputs)
# Batch the examples
assert batch_size is not None, "batch_size must not be None"
dataset = dataset.batch(batch_size)
# Return the dataset.
return dataset
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment