Skip to content

Instantly share code, notes, and snippets.

@accraze
Created January 23, 2021 00:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save accraze/8fd30ba56bfd688ff9e5506976b86901 to your computer and use it in GitHub Desktop.
Save accraze/8fd30ba56bfd688ff9e5506976b86901 to your computer and use it in GitHub Desktop.
Boosted Trees test model
import os
import shutil
import numpy as np
import pandas as pd
import tensorflow as tf
dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')
dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')
MODEL = '1611359327' # update with whatever the train script displays
modelPath = os.path.join('modelDir','boostedTrees_model', MODEL)
savedModelPath = modelPath
importedModel = tf.saved_model.load(savedModelPath)
def predict(dfeval, importedModel):
colNames = dfeval.columns
dtypes = dfeval.dtypes
predictions = []
for row in dfeval.iterrows():
example = tf.train.Example()
for i in range(len(colNames)):
dtype = dtypes[i]
colName = colNames[i]
value = row[1][colName]
if dtype == "object":
value = bytes(value, "utf-8")
example.features.feature[colName].bytes_list.value.extend(
[value])
elif dtype == "float":
example.features.feature[colName].float_list.value.extend(
[value])
elif dtype == "int":
example.features.feature[colName].int64_list.value.extend(
[value])
predictions.append(
importedModel.signatures["predict"](
examples=tf.constant([example.SerializeToString()])
)
)
return predictions
dfeval.drop(columns=["survived"], inplace=True)
predictions = predict(dfeval, importedModel)
newPreds = []
for pred in predictions[:10]:
# change 'probabilities' with 'predictions' in case
# of regression model.
newPreds.append(np.argmax(pred["probabilities"]))
print(newPreds)
"""
Train a Boosted Trees Classifier
"""
import os
import shutil
import numpy as np
import pandas as pd
import tensorflow as tf
dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')
dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')
LABEL = "survived"
NUMERIC_COLUMNS = ['age', 'fare']
CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck', 'embark_town', 'alone']
feature_columns = []
for feature_name in CATEGORICAL_COLUMNS:
vocabulary = dftrain[feature_name].unique()
feature_columns.append(
tf.feature_column.categorical_column_with_vocabulary_list(
feature_name, vocabulary))
for feature_name in NUMERIC_COLUMNS:
feature_columns.append(
tf.feature_column.numeric_column(
feature_name, dtype=tf.float32))
def make_train_input_fn(df, num_epochs, shuffle=True):
return tf.compat.v1.estimator.inputs.pandas_input_fn(
x = df,
y = df[LABEL],
batch_size = 128,
num_epochs = num_epochs,
shuffle = shuffle,
queue_capacity = 1000
)
def make_prediction_input_fn(df):
return tf.compat.v1.estimator.inputs.pandas_input_fn(
x = df,
y = None,
batch_size = 128,
shuffle = False,
queue_capacity = 1000
)
# Instantiate the pre-made estimator
# model = tf.estimator.LinearClassifier(feature_columns)# Train the model
model = tf.estimator.BoostedTreesClassifier(feature_columns, n_batches_per_layer=1)# Train the model
model.train(make_train_input_fn(dftrain, num_epochs=50))
res = model.evaluate(make_train_input_fn(dfeval, num_epochs=1, shuffle=False))
print(pd.Series(res))
predDicts = list(model.predict(make_prediction_input_fn(dfeval)))
preds = []
for pred in predDicts[:10]:
preds.append(np.argmax(pred["probabilities"]))
# look at first 10 predictions
print(preds)
inputFn = tf.estimator.export.build_parsing_serving_input_receiver_fn(
tf.feature_column.make_parse_example_spec(feature_columns))
OUTDIR = 'modelDir'
shutil.rmtree(OUTDIR, ignore_errors = True) # start fresh each time
modelBasePath = os.path.join(OUTDIR, "boostedTrees_model")
modelPath = model.export_saved_model(modelBasePath, inputFn)
print(modelPath)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment