Last active
March 30, 2020 23:02
-
-
Save patrickvossler18/425636471ca04d7cfea225a64a6cae61 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Example taken from https://www.tensorflow.org/tutorials/estimator/premade | |
import tensorflow as tf | |
import numpy as np | |
import pandas as pd | |
IRIS_TRAINING = "~/Downloads/iris_training.csv" | |
IRIS_TEST = "~/Downloads/iris_test.csv" | |
CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth', 'Species'] | |
SPECIES = ['Setosa', 'Versicolor', 'Virginica'] | |
training_set = pd.read_csv(IRIS_TRAINING, names=CSV_COLUMN_NAMES, header=0) | |
test_set = pd.read_csv(IRIS_TEST, names=CSV_COLUMN_NAMES, header=0) | |
train_y = training_set.pop('Species') | |
test_y = test_set.pop('Species') | |
# The label column has now been removed from the features. | |
training_set.head() | |
# Now that the tf.contrib class has been removed for newer versions of tensorflow, the method for training our model is a bit more complicated | |
# Because we already have the data set up, we can define a model using a TensorFlow Estimator. | |
# An Estimator is any class derived from tf.estimator.Estimator. | |
# TensorFlow provides a collection of tf.estimator (for example, LinearRegressor) to implement common ML algorithms. | |
# To write a TensorFlow program based on pre-made Estimators, you must perform the following tasks: | |
# | |
# - Create one or more input functions. | |
# - Define the model's feature columns. | |
# - Instantiate an Estimator, specifying the feature columns and various hyperparameters. | |
# - Call one or more methods on the Estimator object, passing the appropriate input function as the source of the data. | |
# - Let's see how those tasks are implemented for Iris classification. | |
# CREATE INPUT FUNCTIONS | |
# You must create input functions to supply data for training, evaluating, and prediction. | |
# | |
# An input function is a function that returns a tf.data.Dataset object which outputs the following two-element tuple: | |
# | |
# features - A Python dictionary in which: | |
# Each key is the name of a feature. | |
# Each value is an array containing all of that feature's values. | |
# label - An array containing the values of the label for every example. | |
# Just to demonstrate the format of the input function, here's a simple implementation: | |
def input_evaluation_set(): | |
features = {'SepalLength': np.array([6.4, 5.0]), | |
'SepalWidth': np.array([2.8, 2.3]), | |
'PetalLength': np.array([5.6, 3.3]), | |
'PetalWidth': np.array([2.2, 1.0])} | |
labels = np.array([2, 1]) | |
return features, labels | |
def input_fn(features, labels, training=True, batch_size=256): | |
"""An input function for training or evaluating""" | |
# Convert the inputs to a Dataset. | |
dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels)) | |
# Shuffle and repeat if you are in training mode. | |
if training: | |
dataset = dataset.shuffle(1000).repeat() | |
return dataset.batch(batch_size) | |
# DEFINE THE FEATURE COLUMNS | |
# A feature column is an object describing how the model should use raw input data from the features dictionary. | |
# When you build an Estimator model, you pass it a list of feature columns that describes each of the features you want the model to use. | |
# The tf.feature_column module provides many options for representing data to the model. | |
# | |
# For Iris, the 4 raw features are numeric values, so we'll build a list of feature columns to tell the Estimator model to represent each of the four features as 32-bit floating-point values. | |
# Therefore, the code to create the feature column is: | |
# Feature columns describe how to use the input. | |
my_feature_columns = [] | |
for key in training_set.keys(): | |
my_feature_columns.append(tf.feature_column.numeric_column(key=key)) | |
# INSTANTIATE AN ESTIMATOR | |
# Build a DNN with 3 hidden layers with 10, 20, and 10 hidden nodes each. | |
classifier = tf.estimator.DNNClassifier( | |
feature_columns=my_feature_columns, | |
hidden_units=[10,20, 10], | |
# The model must choose between 3 classes. | |
n_classes=3) | |
# TRAIN, EVALUATE, PREDICT | |
# Train the Model. | |
classifier.train( | |
input_fn=lambda: input_fn(training_set, train_y, training=True), | |
steps=5000) | |
# Note that we wrap up our input_fn call in a lambda to capture the arguments while providing an input function that | |
# takes no arguments, as expected by the Estimator. | |
# The steps argument tells the method to stop training after a number of training steps. | |
# EVALUATE THE TRAINED MODEL | |
# Now that the model has been trained, we can get some statistics on its performance. | |
# The following code block evaluates the accuracy of the trained model on the test data: | |
eval_result = classifier.evaluate( | |
input_fn=lambda: input_fn(test_set, test_y, training=False)) | |
print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result)) | |
#You now have a trained model that produces good evaluation results. | |
# You can now use the trained model to predict the species of an Iris flower based on some unlabeled measurements. | |
# As with training and evaluation, you make predictions using a single function call: | |
# Generate predictions from the model | |
# NOTE: These test points are different from those used in lecture. | |
# To make sure you understand what's going on, try adding the test points used in lecture | |
expected = ['Setosa', 'Versicolor', 'Virginica'] | |
predict_x = { | |
'SepalLength': [5.1, 5.9, 6.9], | |
'SepalWidth': [3.3, 3.0, 3.1], | |
'PetalLength': [1.7, 4.2, 5.4], | |
'PetalWidth': [0.5, 1.5, 2.1], | |
} | |
def input_fn(features, batch_size=256): | |
"""An input function for prediction.""" | |
# Convert the inputs to a Dataset without labels. | |
return tf.data.Dataset.from_tensor_slices(dict(features)).batch(batch_size) | |
predictions = classifier.predict( | |
input_fn=lambda: input_fn(predict_x)) | |
# The predict method returns a Python iterable, yielding a dictionary of prediction results for each example. | |
# The following code prints a few predictions and their probabilities: | |
for pred_dict, expec in zip(predictions, expected): | |
class_id = pred_dict['class_ids'][0] | |
probability = pred_dict['probabilities'][class_id] | |
print('Prediction is "{}" ({:.1f}%), expected "{}"'.format( | |
SPECIES[class_id], 100 * probability, expec)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment