JustinaPetr/sentiment.py

## sentiment.py
from rasa_nlu.components import Component
from rasa_nlu import utils
from rasa_nlu.model import Metadata
from sentiment_classifier import SentimentClassifier

import nltk
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import twitter_samples
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer
import os


SENTIMENT_MODEL_FILE_NAME = "sentiment_classifier.pkl"

class SentimentAnalyzer(Component):
    """A new component"""

    # Name of the component to be used when integrating it in a
    # pipeline. E.g. ``[ComponentA, ComponentB]``
    # will be a proper pipeline definition where ``ComponentA``
    # is the name of the first component of the pipeline.
    name = "sentiment"

    # Defines what attributes the pipeline component will
    # provide when called. The listed attributes
    # should be set by the component on the message object
    # during test and train, e.g.
    # ```message.set("entities", [...])```
    provides = ["entities"]

    # Which attributes on a message are required by this
    # component. e.g. if requires contains "tokens", than a
    # previous component in the pipeline needs to have "tokens"
    # within the above described `provides` property.
    requires = ["tokens"]

    # Defines the default configuration parameters of a component
    # these values can be overwritten in the pipeline configuration
    # of the model. The component should choose sensible defaults
    # and should be able to create reasonable results with the defaults.
    defaults = {}

    # Defines what language(s) this component can handle.
    # This attribute is designed for instance method: `can_handle_language`.
    # Default value is None which means it can handle all languages.
    # This is an important feature for backwards compatibility of components.
    language_list = ['en']

    def __init__(self, component_config=None):
        super(SentimentAnalyzer, self).__init__(component_config)

    def train(self, training_data, cfg, **kwargs):
        """Train this component.

        This is the components chance to train itself provided
        with the training data. The component can rely on
        any context attribute to be present, that gets created
        by a call to :meth:`components.Component.pipeline_init`
        of ANY component and
        on any context attributes created by a call to
        :meth:`components.Component.train`
        of components previous to this one."""

        self.classifier = SentimentClassifier()
        self.clf = self.classifier.train()

    def convert_to_rasa(self, value, confidence):

        entity = {"value": value,
                  "confidence": confidence,
                  "entity": 'sentiment',
                  "extractor": "sentiment_extractor"}

        return entity

     def preprocessing(self, tokens):
        tokens_no_stop_words = [t for t in tokens if t not in stopwords.words('english')]
        tokens_lowercase = [t.lower() for t in tokens_no_stop_words]
        tokens_lemmatized = [WordNetLemmatizer().lemmatize(t) for t in tokens_lowercase]
        return ({word: True for word in tokens_lemmatized})


    def process(self, message, **kwargs):
        """Process an incoming message.

        This is the components chance to process an incoming
        message. The component can rely on
        any context attribute to be present, that gets created
        by a call to :meth:`components.Component.pipeline_init`
        of ANY component and
        on any context attributes created by a call to
        :meth:`components.Component.process`
        of components previous to this one."""

        if not self.classifier:
            # component is either not trained or didn't
            # receive enough training data
            entity = None
        else:
            tokens = [t.text for t in message.get("tokens")]
            tb = self.preprocessing(tokens)
            pred = self.clf.prob_classify(tb)

            sentiment = pred.max()
            confidence = pred.prob(sentiment)

            entity = self.convert_to_rasa(sentiment, confidence)

            message.set("entities", [entity], add_to_output=True)


    def persist(self, model_dir):
        """Persist this model into the passed directory."""

        classifier_file = os.path.join(model_dir, SENTIMENT_MODEL_FILE_NAME)
        utils.pycloud_pickle(classifier_file, self)
        return {"classifier_file": SENTIMENT_MODEL_FILE_NAME}


    @classmethod
    def load(cls,
             model_dir = None,
             model_metadata = None,
             cached_component = None,
             **kwargs):

        meta = model_metadata.for_component(cls.name)
        file_name = meta.get("classifier_file", SENTIMENT_MODEL_FILE_NAME)
        classifier_file = os.path.join(model_dir, file_name)

        if os.path.exists(classifier_file):
            return utils.pycloud_unpickle(classifier_file)
        else:
            return cls(meta)
	from rasa_nlu.components import Component
	from rasa_nlu import utils
	from rasa_nlu.model import Metadata
	from sentiment_classifier import SentimentClassifier

	import nltk
	from nltk.classify import NaiveBayesClassifier
	from nltk.corpus import twitter_samples
	from nltk.tokenize import RegexpTokenizer
	from nltk.corpus import stopwords
	from nltk.stem.wordnet import WordNetLemmatizer
	import os



	SENTIMENT_MODEL_FILE_NAME = "sentiment_classifier.pkl"

	class SentimentAnalyzer(Component):
	"""A new component"""

	# Name of the component to be used when integrating it in a
	# pipeline. E.g. ``[ComponentA, ComponentB]``
	# will be a proper pipeline definition where ``ComponentA``
	# is the name of the first component of the pipeline.
	name = "sentiment"

	# Defines what attributes the pipeline component will
	# provide when called. The listed attributes
	# should be set by the component on the message object
	# during test and train, e.g.
	# ```message.set("entities", [...])```
	provides = ["entities"]

	# Which attributes on a message are required by this
	# component. e.g. if requires contains "tokens", than a
	# previous component in the pipeline needs to have "tokens"
	# within the above described `provides` property.
	requires = ["tokens"]

	# Defines the default configuration parameters of a component
	# these values can be overwritten in the pipeline configuration
	# of the model. The component should choose sensible defaults
	# and should be able to create reasonable results with the defaults.
	defaults = {}

	# Defines what language(s) this component can handle.
	# This attribute is designed for instance method: `can_handle_language`.
	# Default value is None which means it can handle all languages.
	# This is an important feature for backwards compatibility of components.
	language_list = ['en']

	def __init__(self, component_config=None):
	super(SentimentAnalyzer, self).__init__(component_config)

	def train(self, training_data, cfg, **kwargs):
	"""Train this component.

	This is the components chance to train itself provided
	with the training data. The component can rely on
	any context attribute to be present, that gets created
	by a call to :meth:`components.Component.pipeline_init`
	of ANY component and
	on any context attributes created by a call to
	:meth:`components.Component.train`
	of components previous to this one."""

	self.classifier = SentimentClassifier()
	self.clf = self.classifier.train()

	def convert_to_rasa(self, value, confidence):

	entity = {"value": value,
	"confidence": confidence,
	"entity": 'sentiment',
	"extractor": "sentiment_extractor"}

	return entity

	def preprocessing(self, tokens):
	tokens_no_stop_words = [t for t in tokens if t not in stopwords.words('english')]
	tokens_lowercase = [t.lower() for t in tokens_no_stop_words]
	tokens_lemmatized = [WordNetLemmatizer().lemmatize(t) for t in tokens_lowercase]
	return ({word: True for word in tokens_lemmatized})



	def process(self, message, **kwargs):
	"""Process an incoming message.

	This is the components chance to process an incoming
	message. The component can rely on
	any context attribute to be present, that gets created
	by a call to :meth:`components.Component.pipeline_init`
	of ANY component and
	on any context attributes created by a call to
	:meth:`components.Component.process`
	of components previous to this one."""

	if not self.classifier:
	# component is either not trained or didn't
	# receive enough training data
	entity = None
	else:
	tokens = [t.text for t in message.get("tokens")]
	tb = self.preprocessing(tokens)
	pred = self.clf.prob_classify(tb)

	sentiment = pred.max()
	confidence = pred.prob(sentiment)

	entity = self.convert_to_rasa(sentiment, confidence)

	message.set("entities", [entity], add_to_output=True)





	def persist(self, model_dir):
	"""Persist this model into the passed directory."""

	classifier_file = os.path.join(model_dir, SENTIMENT_MODEL_FILE_NAME)
	utils.pycloud_pickle(classifier_file, self)
	return {"classifier_file": SENTIMENT_MODEL_FILE_NAME}



	@classmethod
	def load(cls,
	model_dir = None,
	model_metadata = None,
	cached_component = None,
	**kwargs):

	meta = model_metadata.for_component(cls.name)
	file_name = meta.get("classifier_file", SENTIMENT_MODEL_FILE_NAME)
	classifier_file = os.path.join(model_dir, file_name)

	if os.path.exists(classifier_file):
	return utils.pycloud_unpickle(classifier_file)
	else:
	return cls(meta)