georgwiese/universal_sentence_encoder_featurizer.py

## universal_sentence_encoder_featurizer.py
""" Enhancing Intent Classification with the Universal Sentence Encoder:
https://medium.com/scalableminds/enhancing-intent-classification-with-the-universal-sentence-encoder-ecbcd7a3005c
"""

from rasa_nlu.featurizers import Featurizer

import tensorflow_hub as hub
import tensorflow as tf


class UniversalSentenceEncoderFeaturizer(Featurizer):
    """Appends a universal sentence encoding to the message's text_features."""

    # URL of the TensorFlow Hub Module
    TFHUB_URL = "https://tfhub.dev/google/universal-sentence-encoder/2"

    name = "universal_sentence_encoder_featurizer"

    # We don't require any previous pipline step and return text_features
    requires = []
    provides = ["text_features"]


    def __init__(self, component_config):

        super(UniversalSentenceEncoderFeaturizer, self).__init__(component_config)

        # Load the TensorFlow Hub Module with pre-trained weights
        sentence_encoder = hub.Module(self.TFHUB_URL)
        # Create a TensorFlow placeholder for the input string
        self.input_string = tf.placeholder(tf.string, shape=[None])
        # Invoke `sentence_encoder` in order to create the encoding tensor
        self.encoding = sentence_encoder(self.input_string)

        # Create a TensorFlow Session and run initializers
        self.session = tf.Session()
        self.session.run([tf.global_variables_initializer(),
                          tf.tables_initializer()])


    def train(self, training_data, config, **kwargs):

        # Nothing to train, just process all training examples so that the
        # feature is set for future pipeline steps
        for example in training_data.training_examples:
            self.process(example)


    def process(self, message, **kwargs):

        # Get the sentence encoding by feeding the message text and computing
        # the encoding tensor.
        feature_vector = self.session.run(self.encoding,
                                          {self.input_string: [message.text]})[0]
        # Concatenate the feature vector with any existing text features
        features = self._combine_with_existing_text_features(message, feature_vector)
        # Set the feature, overwriting any existing `text_features`
        message.set("text_features", features)
	""" Enhancing Intent Classification with the Universal Sentence Encoder:
	https://medium.com/scalableminds/enhancing-intent-classification-with-the-universal-sentence-encoder-ecbcd7a3005c
	"""

	from rasa_nlu.featurizers import Featurizer

	import tensorflow_hub as hub
	import tensorflow as tf


	class UniversalSentenceEncoderFeaturizer(Featurizer):
	"""Appends a universal sentence encoding to the message's text_features."""

	# URL of the TensorFlow Hub Module
	TFHUB_URL = "https://tfhub.dev/google/universal-sentence-encoder/2"

	name = "universal_sentence_encoder_featurizer"

	# We don't require any previous pipline step and return text_features
	requires = []
	provides = ["text_features"]


	def __init__(self, component_config):

	super(UniversalSentenceEncoderFeaturizer, self).__init__(component_config)

	# Load the TensorFlow Hub Module with pre-trained weights
	sentence_encoder = hub.Module(self.TFHUB_URL)
	# Create a TensorFlow placeholder for the input string
	self.input_string = tf.placeholder(tf.string, shape=[None])
	# Invoke `sentence_encoder` in order to create the encoding tensor
	self.encoding = sentence_encoder(self.input_string)

	# Create a TensorFlow Session and run initializers
	self.session = tf.Session()
	self.session.run([tf.global_variables_initializer(),
	tf.tables_initializer()])


	def train(self, training_data, config, **kwargs):

	# Nothing to train, just process all training examples so that the
	# feature is set for future pipeline steps
	for example in training_data.training_examples:
	self.process(example)


	def process(self, message, **kwargs):

	# Get the sentence encoding by feeding the message text and computing
	# the encoding tensor.
	feature_vector = self.session.run(self.encoding,
	{self.input_string: [message.text]})[0]
	# Concatenate the feature vector with any existing text features
	features = self._combine_with_existing_text_features(message, feature_vector)
	# Set the feature, overwriting any existing `text_features`
	message.set("text_features", features)