epwalsh/step1.py

## step1.py
from allennlp.common.params import Params
from allennlp.common.plugins import import_plugins
from allennlp.data.tokenizers import Tokenizer, PretrainedTransformerTokenizer
from allennlp.models import load_archive
from allennlp.modules.token_embedders import PretrainedTransformerEmbedder

# Change this to your serialization directory.
serialization_dir = "~/my-trained-model"

# Make sure all of the classes our model and tokenizer use are registered.
import_plugins()

# Load the archive from the serialization directory, which contains the trained
# model and the params.
archive = load_archive(serialization_dir + "/model.tar.gz")

# Pull out just the PretrainedTransformerEmbedder part of the model.
# You may need to adjust this line slightly depending on how your model is set up.
transformer_embedder: PretrainedTransformerEmbedder = \
    archive.model._source_embedder._token_embedders["tokens"]

# Now load the corresponding tokenizer.
# Again, you may need to adjust this line depending on how your config is set up.
tokenizer: PretrainedTransformerTokenizer = Tokenizer.from_params(
    archive.config["dataset_reader"]["source_tokenizer"]
)
	from allennlp.common.params import Params
	from allennlp.common.plugins import import_plugins
	from allennlp.data.tokenizers import Tokenizer, PretrainedTransformerTokenizer
	from allennlp.models import load_archive
	from allennlp.modules.token_embedders import PretrainedTransformerEmbedder

	# Change this to your serialization directory.
	serialization_dir = "~/my-trained-model"

	# Make sure all of the classes our model and tokenizer use are registered.
	import_plugins()

	# Load the archive from the serialization directory, which contains the trained
	# model and the params.
	archive = load_archive(serialization_dir + "/model.tar.gz")

	# Pull out just the PretrainedTransformerEmbedder part of the model.
	# You may need to adjust this line slightly depending on how your model is set up.
	transformer_embedder: PretrainedTransformerEmbedder = \
	archive.model._source_embedder._token_embedders["tokens"]

	# Now load the corresponding tokenizer.
	# Again, you may need to adjust this line depending on how your config is set up.
	tokenizer: PretrainedTransformerTokenizer = Tokenizer.from_params(
	archive.config["dataset_reader"]["source_tokenizer"]
	)