accraze/predict.py

## predict.py
import os
import shutil
import numpy as np
import pandas as pd
import tensorflow as tf

dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')
dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')

MODEL = '1611359327'  # update with whatever the train script displays
modelPath = os.path.join('modelDir','boostedTrees_model', MODEL)
savedModelPath = modelPath
importedModel = tf.saved_model.load(savedModelPath)

def predict(dfeval, importedModel):
    colNames = dfeval.columns
    dtypes = dfeval.dtypes
    predictions = []
    for row in dfeval.iterrows():
        example = tf.train.Example()
        for i in range(len(colNames)):
            dtype = dtypes[i]
            colName = colNames[i]
            value = row[1][colName]
            if dtype == "object":
                value = bytes(value, "utf-8")
                example.features.feature[colName].bytes_list.value.extend(
                    [value])
            elif dtype == "float":
                example.features.feature[colName].float_list.value.extend(
                    [value])
            elif dtype == "int":
                example.features.feature[colName].int64_list.value.extend(
                    [value])

        predictions.append(
            importedModel.signatures["predict"](
                examples=tf.constant([example.SerializeToString()])
                )
        )

    return predictions

dfeval.drop(columns=["survived"], inplace=True)


predictions = predict(dfeval, importedModel)
newPreds = []
for pred in predictions[:10]:
    # change 'probabilities' with 'predictions' in case
    # of regression model.
    newPreds.append(np.argmax(pred["probabilities"]))
print(newPreds)

## train.py
"""
Train a Boosted Trees Classifier
"""
import os
import shutil
import numpy as np
import pandas as pd
import tensorflow as tf


dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')
dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')

LABEL = "survived"
NUMERIC_COLUMNS = ['age', 'fare']
CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck', 'embark_town', 'alone']
feature_columns = []
for feature_name in CATEGORICAL_COLUMNS:
  vocabulary = dftrain[feature_name].unique()
  feature_columns.append(
     tf.feature_column.categorical_column_with_vocabulary_list(
       feature_name, vocabulary))

for feature_name in NUMERIC_COLUMNS:
  feature_columns.append(
    tf.feature_column.numeric_column(
      feature_name, dtype=tf.float32))


def make_train_input_fn(df, num_epochs, shuffle=True):
  return tf.compat.v1.estimator.inputs.pandas_input_fn(
    x = df,
    y = df[LABEL],
    batch_size = 128,
    num_epochs = num_epochs,
    shuffle = shuffle,
    queue_capacity = 1000
  )

def make_prediction_input_fn(df):
  return tf.compat.v1.estimator.inputs.pandas_input_fn(
    x = df,
    y = None,
    batch_size = 128,
    shuffle = False,
    queue_capacity = 1000
  )

# Instantiate the pre-made estimator
# model = tf.estimator.LinearClassifier(feature_columns)# Train the model
model = tf.estimator.BoostedTreesClassifier(feature_columns, n_batches_per_layer=1)# Train the model
model.train(make_train_input_fn(dftrain, num_epochs=50))
res = model.evaluate(make_train_input_fn(dfeval, num_epochs=1, shuffle=False))
print(pd.Series(res))

predDicts = list(model.predict(make_prediction_input_fn(dfeval)))
preds = []
for pred in predDicts[:10]:
  preds.append(np.argmax(pred["probabilities"]))

# look at first 10 predictions
print(preds)

inputFn = tf.estimator.export.build_parsing_serving_input_receiver_fn(
        tf.feature_column.make_parse_example_spec(feature_columns))
OUTDIR = 'modelDir'
shutil.rmtree(OUTDIR, ignore_errors = True) # start fresh each time
modelBasePath = os.path.join(OUTDIR, "boostedTrees_model")
modelPath = model.export_saved_model(modelBasePath, inputFn)
print(modelPath)
	import os
	import shutil
	import numpy as np
	import pandas as pd
	import tensorflow as tf

	dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')
	dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')

	MODEL = '1611359327' # update with whatever the train script displays
	modelPath = os.path.join('modelDir','boostedTrees_model', MODEL)
	savedModelPath = modelPath
	importedModel = tf.saved_model.load(savedModelPath)

	def predict(dfeval, importedModel):
	colNames = dfeval.columns
	dtypes = dfeval.dtypes
	predictions = []
	for row in dfeval.iterrows():
	example = tf.train.Example()
	for i in range(len(colNames)):
	dtype = dtypes[i]
	colName = colNames[i]
	value = row[1][colName]
	if dtype == "object":
	value = bytes(value, "utf-8")
	example.features.feature[colName].bytes_list.value.extend(
	[value])
	elif dtype == "float":
	example.features.feature[colName].float_list.value.extend(
	[value])
	elif dtype == "int":
	example.features.feature[colName].int64_list.value.extend(
	[value])

	predictions.append(
	importedModel.signatures["predict"](
	examples=tf.constant([example.SerializeToString()])
	)
	)

	return predictions

	dfeval.drop(columns=["survived"], inplace=True)


	predictions = predict(dfeval, importedModel)
	newPreds = []
	for pred in predictions[:10]:
	# change 'probabilities' with 'predictions' in case
	# of regression model.
	newPreds.append(np.argmax(pred["probabilities"]))
	print(newPreds)
	"""
	Train a Boosted Trees Classifier
	"""
	import os
	import shutil
	import numpy as np
	import pandas as pd
	import tensorflow as tf


	dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')
	dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')

	LABEL = "survived"
	NUMERIC_COLUMNS = ['age', 'fare']
	CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck', 'embark_town', 'alone']
	feature_columns = []
	for feature_name in CATEGORICAL_COLUMNS:
	vocabulary = dftrain[feature_name].unique()
	feature_columns.append(
	tf.feature_column.categorical_column_with_vocabulary_list(
	feature_name, vocabulary))

	for feature_name in NUMERIC_COLUMNS:
	feature_columns.append(
	tf.feature_column.numeric_column(
	feature_name, dtype=tf.float32))


	def make_train_input_fn(df, num_epochs, shuffle=True):
	return tf.compat.v1.estimator.inputs.pandas_input_fn(
	x = df,
	y = df[LABEL],
	batch_size = 128,
	num_epochs = num_epochs,
	shuffle = shuffle,
	queue_capacity = 1000
	)

	def make_prediction_input_fn(df):
	return tf.compat.v1.estimator.inputs.pandas_input_fn(
	x = df,
	y = None,
	batch_size = 128,
	shuffle = False,
	queue_capacity = 1000
	)

	# Instantiate the pre-made estimator
	# model = tf.estimator.LinearClassifier(feature_columns)# Train the model
	model = tf.estimator.BoostedTreesClassifier(feature_columns, n_batches_per_layer=1)# Train the model
	model.train(make_train_input_fn(dftrain, num_epochs=50))
	res = model.evaluate(make_train_input_fn(dfeval, num_epochs=1, shuffle=False))
	print(pd.Series(res))

	predDicts = list(model.predict(make_prediction_input_fn(dfeval)))
	preds = []
	for pred in predDicts[:10]:
	preds.append(np.argmax(pred["probabilities"]))

	# look at first 10 predictions
	print(preds)

	inputFn = tf.estimator.export.build_parsing_serving_input_receiver_fn(
	tf.feature_column.make_parse_example_spec(feature_columns))
	OUTDIR = 'modelDir'
	shutil.rmtree(OUTDIR, ignore_errors = True) # start fresh each time
	modelBasePath = os.path.join(OUTDIR, "boostedTrees_model")
	modelPath = model.export_saved_model(modelBasePath, inputFn)
	print(modelPath)