patrickvossler18/DNNClassifier_Iris_example_tf_2_0.py

## DNNClassifier_Iris_example_tf_2_0.py
# Example taken from https://www.tensorflow.org/tutorials/estimator/premade

import tensorflow as tf
import numpy as np
import pandas as pd

IRIS_TRAINING = "~/Downloads/iris_training.csv"
IRIS_TEST = "~/Downloads/iris_test.csv"

CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth', 'Species']
SPECIES = ['Setosa', 'Versicolor', 'Virginica']


training_set = pd.read_csv(IRIS_TRAINING, names=CSV_COLUMN_NAMES, header=0)
test_set = pd.read_csv(IRIS_TEST, names=CSV_COLUMN_NAMES, header=0)

train_y = training_set.pop('Species')
test_y = test_set.pop('Species')

# The label column has now been removed from the features.
training_set.head()


# Now that the tf.contrib class has been removed for newer versions of tensorflow, the method for training our model is a bit more complicated
# Because we already have the data set up, we can define a model using a TensorFlow Estimator.
# An Estimator is any class derived from tf.estimator.Estimator.
# TensorFlow provides a collection of tf.estimator (for example, LinearRegressor) to implement common ML algorithms.

# To write a TensorFlow program based on pre-made Estimators, you must perform the following tasks:
#
# - Create one or more input functions.
# - Define the model's feature columns.
# - Instantiate an Estimator, specifying the feature columns and various hyperparameters.
# - Call one or more methods on the Estimator object, passing the appropriate input function as the source of the data.
# - Let's see how those tasks are implemented for Iris classification.

# CREATE INPUT FUNCTIONS

# You must create input functions to supply data for training, evaluating, and prediction.
#
# An input function is a function that returns a tf.data.Dataset object which outputs the following two-element tuple:
#
# features - A Python dictionary in which:
# Each key is the name of a feature.
# Each value is an array containing all of that feature's values.
# label - An array containing the values of the label for every example.
# Just to demonstrate the format of the input function, here's a simple implementation:

def input_evaluation_set():
    features = {'SepalLength': np.array([6.4, 5.0]),
                'SepalWidth':  np.array([2.8, 2.3]),
                'PetalLength': np.array([5.6, 3.3]),
                'PetalWidth':  np.array([2.2, 1.0])}
    labels = np.array([2, 1])
    return features, labels


def input_fn(features, labels, training=True, batch_size=256):
    """An input function for training or evaluating"""
    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

    # Shuffle and repeat if you are in training mode.
    if training:
        dataset = dataset.shuffle(1000).repeat()
    return dataset.batch(batch_size)


# DEFINE THE FEATURE COLUMNS

# A feature column is an object describing how the model should use raw input data from the features dictionary.
# When you build an Estimator model, you pass it a list of feature columns that describes each of the features you want the model to use.
# The tf.feature_column module provides many options for representing data to the model.
#
# For Iris, the 4 raw features are numeric values, so we'll build a list of feature columns to tell the Estimator model to represent each of the four features as 32-bit floating-point values.
# Therefore, the code to create the feature column is:

# Feature columns describe how to use the input.
my_feature_columns = []
for key in training_set.keys():
    my_feature_columns.append(tf.feature_column.numeric_column(key=key))

# INSTANTIATE AN ESTIMATOR

# Build a DNN with 3 hidden layers with 10, 20, and 10 hidden nodes each.
classifier = tf.estimator.DNNClassifier(
    feature_columns=my_feature_columns,
    hidden_units=[10,20, 10],
    # The model must choose between 3 classes.
    n_classes=3)

# TRAIN, EVALUATE, PREDICT

# Train the Model.
classifier.train(
    input_fn=lambda: input_fn(training_set, train_y, training=True),
    steps=5000)

# Note that we wrap up our input_fn call in a lambda to capture the arguments while providing an input function that
# takes no arguments, as expected by the Estimator.
# The steps argument tells the method to stop training after a number of training steps.


# EVALUATE THE TRAINED MODEL
# Now that the model has been trained, we can get some statistics on its performance.
# The following code block evaluates the accuracy of the trained model on the test data:

eval_result = classifier.evaluate(
    input_fn=lambda: input_fn(test_set, test_y, training=False))

print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))


#You now have a trained model that produces good evaluation results.
# You can now use the trained model to predict the species of an Iris flower based on some unlabeled measurements.
# As with training and evaluation, you make predictions using a single function call:

# Generate predictions from the model
# NOTE: These test points are different from those used in lecture.
# To make sure you understand what's going on, try adding the test points used in lecture

expected = ['Setosa', 'Versicolor', 'Virginica']
predict_x = {
    'SepalLength': [5.1, 5.9, 6.9],
    'SepalWidth': [3.3, 3.0, 3.1],
    'PetalLength': [1.7, 4.2, 5.4],
    'PetalWidth': [0.5, 1.5, 2.1],
}

def input_fn(features, batch_size=256):
    """An input function for prediction."""
    # Convert the inputs to a Dataset without labels.
    return tf.data.Dataset.from_tensor_slices(dict(features)).batch(batch_size)

predictions = classifier.predict(
    input_fn=lambda: input_fn(predict_x))

# The predict method returns a Python iterable, yielding a dictionary of prediction results for each example.
# The following code prints a few predictions and their probabilities:

for pred_dict, expec in zip(predictions, expected):
    class_id = pred_dict['class_ids'][0]
    probability = pred_dict['probabilities'][class_id]

    print('Prediction is "{}" ({:.1f}%), expected "{}"'.format(
        SPECIES[class_id], 100 * probability, expec))
	# Example taken from https://www.tensorflow.org/tutorials/estimator/premade

	import tensorflow as tf
	import numpy as np
	import pandas as pd

	IRIS_TRAINING = "~/Downloads/iris_training.csv"
	IRIS_TEST = "~/Downloads/iris_test.csv"

	CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth', 'Species']
	SPECIES = ['Setosa', 'Versicolor', 'Virginica']


	training_set = pd.read_csv(IRIS_TRAINING, names=CSV_COLUMN_NAMES, header=0)
	test_set = pd.read_csv(IRIS_TEST, names=CSV_COLUMN_NAMES, header=0)

	train_y = training_set.pop('Species')
	test_y = test_set.pop('Species')

	# The label column has now been removed from the features.
	training_set.head()


	# Now that the tf.contrib class has been removed for newer versions of tensorflow, the method for training our model is a bit more complicated
	# Because we already have the data set up, we can define a model using a TensorFlow Estimator.
	# An Estimator is any class derived from tf.estimator.Estimator.
	# TensorFlow provides a collection of tf.estimator (for example, LinearRegressor) to implement common ML algorithms.

	# To write a TensorFlow program based on pre-made Estimators, you must perform the following tasks:
	#
	# - Create one or more input functions.
	# - Define the model's feature columns.
	# - Instantiate an Estimator, specifying the feature columns and various hyperparameters.
	# - Call one or more methods on the Estimator object, passing the appropriate input function as the source of the data.
	# - Let's see how those tasks are implemented for Iris classification.

	# CREATE INPUT FUNCTIONS

	# You must create input functions to supply data for training, evaluating, and prediction.
	#
	# An input function is a function that returns a tf.data.Dataset object which outputs the following two-element tuple:
	#
	# features - A Python dictionary in which:
	# Each key is the name of a feature.
	# Each value is an array containing all of that feature's values.
	# label - An array containing the values of the label for every example.
	# Just to demonstrate the format of the input function, here's a simple implementation:

	def input_evaluation_set():
	features = {'SepalLength': np.array([6.4, 5.0]),
	'SepalWidth': np.array([2.8, 2.3]),
	'PetalLength': np.array([5.6, 3.3]),
	'PetalWidth': np.array([2.2, 1.0])}
	labels = np.array([2, 1])
	return features, labels


	def input_fn(features, labels, training=True, batch_size=256):
	"""An input function for training or evaluating"""
	# Convert the inputs to a Dataset.
	dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

	# Shuffle and repeat if you are in training mode.
	if training:
	dataset = dataset.shuffle(1000).repeat()
	return dataset.batch(batch_size)


	# DEFINE THE FEATURE COLUMNS

	# A feature column is an object describing how the model should use raw input data from the features dictionary.
	# When you build an Estimator model, you pass it a list of feature columns that describes each of the features you want the model to use.
	# The tf.feature_column module provides many options for representing data to the model.
	#
	# For Iris, the 4 raw features are numeric values, so we'll build a list of feature columns to tell the Estimator model to represent each of the four features as 32-bit floating-point values.
	# Therefore, the code to create the feature column is:

	# Feature columns describe how to use the input.
	my_feature_columns = []
	for key in training_set.keys():
	my_feature_columns.append(tf.feature_column.numeric_column(key=key))

	# INSTANTIATE AN ESTIMATOR

	# Build a DNN with 3 hidden layers with 10, 20, and 10 hidden nodes each.
	classifier = tf.estimator.DNNClassifier(
	feature_columns=my_feature_columns,
	hidden_units=[10,20, 10],
	# The model must choose between 3 classes.
	n_classes=3)

	# TRAIN, EVALUATE, PREDICT

	# Train the Model.
	classifier.train(
	input_fn=lambda: input_fn(training_set, train_y, training=True),
	steps=5000)

	# Note that we wrap up our input_fn call in a lambda to capture the arguments while providing an input function that
	# takes no arguments, as expected by the Estimator.
	# The steps argument tells the method to stop training after a number of training steps.


	# EVALUATE THE TRAINED MODEL
	# Now that the model has been trained, we can get some statistics on its performance.
	# The following code block evaluates the accuracy of the trained model on the test data:

	eval_result = classifier.evaluate(
	input_fn=lambda: input_fn(test_set, test_y, training=False))

	print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))


	#You now have a trained model that produces good evaluation results.
	# You can now use the trained model to predict the species of an Iris flower based on some unlabeled measurements.
	# As with training and evaluation, you make predictions using a single function call:

	# Generate predictions from the model
	# NOTE: These test points are different from those used in lecture.
	# To make sure you understand what's going on, try adding the test points used in lecture

	expected = ['Setosa', 'Versicolor', 'Virginica']
	predict_x = {
	'SepalLength': [5.1, 5.9, 6.9],
	'SepalWidth': [3.3, 3.0, 3.1],
	'PetalLength': [1.7, 4.2, 5.4],
	'PetalWidth': [0.5, 1.5, 2.1],
	}

	def input_fn(features, batch_size=256):
	"""An input function for prediction."""
	# Convert the inputs to a Dataset without labels.
	return tf.data.Dataset.from_tensor_slices(dict(features)).batch(batch_size)

	predictions = classifier.predict(
	input_fn=lambda: input_fn(predict_x))

	# The predict method returns a Python iterable, yielding a dictionary of prediction results for each example.
	# The following code prints a few predictions and their probabilities:

	for pred_dict, expec in zip(predictions, expected):
	class_id = pred_dict['class_ids'][0]
	probability = pred_dict['probabilities'][class_id]

	print('Prediction is "{}" ({:.1f}%), expected "{}"'.format(
	SPECIES[class_id], 100 * probability, expec))