Tiago Duque Sirsirious

## save_model_keras.py
model_file = os.path.join("/content/", "saved_model")
# This yields a .pb file in the defined path: /content/saved_model/saved_model.pb
keras_model.save(model_file)

## wrap_trax_keras.py
# Create a full Keras model using the layer you loaded from trax.
inputs = tf.keras.Input(shape=(None,), dtype='int32')
# Use default Keras syntax to link an input to the layer
hidden = keras_layer(inputs)
# Get the outputs from the trax-loaded layer
outputs = hidden
# Finally, wrap everything with a Keras Model
keras_model = tf.keras.Model(inputs=inputs, outputs=outputs)

## convert_to_keras.py
# To convert the model to Keras, simply run:
keras_layer = trax.AsKeras(sentiment_analysis_model)
# This will be a trax.trax2keras.AsKeras object
print(keras_layer)

# Run the Keras layer to verify it returns the same result.
example_input = list(data.tokenize(iter(["I loved the way that the actors were cast, also, It is clear that they've put a huge effort in post-production."]), vocab_file="en_8k.subword"))[0]
sentiment_activations = keras_layer(example_input[None, :])
print(f'Keras returned sentiment activations: {numpy.asarray(sentiment_activations)}')

## adapt_backend.py
# With backend as tensorflow-numpy:
training_loop = training.Loop(sentiment_analysis_model,
                              train_task,
                              eval_tasks=[eval_task],
                              output_dir=output_dir)

# Run 1 step to adapt the backend
training_loop.run(1)

## change_backend.py
trax.fastmath.set_backend("tensorflow-numpy")

## Loadweights.py
# First, we need the same structure:
new_model = tl.Serial(
    tl.Embedding(data.vocab_size(vocab_file='en_8k.subword'), d_feature=256),
    tl.Mean(axis=1),
    tl.Dense(2),
    tl.LogSoftmax()
)
# Then, we load the weights:
new_model.init_from_file(file_name="/root/output_dir/model.pkl.gz", weights_only=True) # Only load weights
# Same result as before (I used a helper function for simplicity)

## load_checkpoint.py
# This loads a checkpoint:
training_loop.load_checkpoint(directory='~/output_dir/', filename="model.pkl.gz")
# Continue training:
training_loop.run(200)

## predicting.py
import numpy
example_input = "I loved the way that the actors were cast, also, It is clear that they've put a huge effort in post-production."
#example_input = "Try your movie review here!"
# Steps explained:
# 1st: tokenize input. We cast it to an iterator to fake a generator.
input_iter = iter([example_input])
input_tokens = data.tokenize(input_iter, vocab_file='en_8k.subword')
# 2nd: cast the results to a list and get the first value (the tokens, not the label or anything else)
tokenized_input = list(input_tokens)[0]
# 3rd: Add fake batch dimmension

## loop.py
from trax.supervised import training
import os

# Training task.
train_task = training.TrainTask(
    labeled_data=train_batches_stream,
    loss_layer=tl.CrossEntropyLoss(),
    optimizer=trax.optimizers.Adam(0.01),
    n_steps_per_checkpoint=200, #This will print the results at every 200 training steps.
)

## final_pipelines.py
# First we get the streams from TFDS
train_stream = trax.data.TFDS('imdb_reviews', keys=('text', 'label'), train=True)()
eval_stream = trax.data.TFDS('imdb_reviews', keys=('text', 'label'), train=False)()

# Next, we build the pipeline
data_pipeline = trax.data.Serial(
    trax.data.Tokenize(vocab_file='en_8k.subword', keys=[0]),
    trax.data.Shuffle(),
    trax.data.FilterByLength(max_length=2048, length_keys=[0]),
    trax.data.BucketByLength(boundaries=[  32, 128, 512, 2048],
	model_file = os.path.join("/content/", "saved_model")
	# This yields a .pb file in the defined path: /content/saved_model/saved_model.pb
	keras_model.save(model_file)
	# Create a full Keras model using the layer you loaded from trax.
	inputs = tf.keras.Input(shape=(None,), dtype='int32')
	# Use default Keras syntax to link an input to the layer
	hidden = keras_layer(inputs)
	# Get the outputs from the trax-loaded layer
	outputs = hidden
	# Finally, wrap everything with a Keras Model
	keras_model = tf.keras.Model(inputs=inputs, outputs=outputs)
	# To convert the model to Keras, simply run:
	keras_layer = trax.AsKeras(sentiment_analysis_model)
	# This will be a trax.trax2keras.AsKeras object
	print(keras_layer)

	# Run the Keras layer to verify it returns the same result.
	example_input = list(data.tokenize(iter(["I loved the way that the actors were cast, also, It is clear that they've put a huge effort in post-production."]), vocab_file="en_8k.subword"))[0]
	sentiment_activations = keras_layer(example_input[None, :])
	print(f'Keras returned sentiment activations: {numpy.asarray(sentiment_activations)}')
	# With backend as tensorflow-numpy:
	training_loop = training.Loop(sentiment_analysis_model,
	train_task,
	eval_tasks=[eval_task],
	output_dir=output_dir)

	# Run 1 step to adapt the backend
	training_loop.run(1)
	# First, we need the same structure:
	new_model = tl.Serial(
	tl.Embedding(data.vocab_size(vocab_file='en_8k.subword'), d_feature=256),
	tl.Mean(axis=1),
	tl.Dense(2),
	tl.LogSoftmax()
	)
	# Then, we load the weights:
	new_model.init_from_file(file_name="/root/output_dir/model.pkl.gz", weights_only=True) # Only load weights
	# Same result as before (I used a helper function for simplicity)
	# This loads a checkpoint:
	training_loop.load_checkpoint(directory='~/output_dir/', filename="model.pkl.gz")
	# Continue training:
	training_loop.run(200)
	import numpy
	example_input = "I loved the way that the actors were cast, also, It is clear that they've put a huge effort in post-production."
	#example_input = "Try your movie review here!"
	# Steps explained:
	# 1st: tokenize input. We cast it to an iterator to fake a generator.
	input_iter = iter([example_input])
	input_tokens = data.tokenize(input_iter, vocab_file='en_8k.subword')
	# 2nd: cast the results to a list and get the first value (the tokens, not the label or anything else)
	tokenized_input = list(input_tokens)[0]
	# 3rd: Add fake batch dimmension
	from trax.supervised import training
	import os

	# Training task.
	train_task = training.TrainTask(
	labeled_data=train_batches_stream,
	loss_layer=tl.CrossEntropyLoss(),
	optimizer=trax.optimizers.Adam(0.01),
	n_steps_per_checkpoint=200, #This will print the results at every 200 training steps.
	)
	# First we get the streams from TFDS
	train_stream = trax.data.TFDS('imdb_reviews', keys=('text', 'label'), train=True)()
	eval_stream = trax.data.TFDS('imdb_reviews', keys=('text', 'label'), train=False)()

	# Next, we build the pipeline
	data_pipeline = trax.data.Serial(
	trax.data.Tokenize(vocab_file='en_8k.subword', keys=[0]),
	trax.data.Shuffle(),
	trax.data.FilterByLength(max_length=2048, length_keys=[0]),
	trax.data.BucketByLength(boundaries=[ 32, 128, 512, 2048],