from rasa.nlu.components import Component
from rasa.nlu import utils
from rasa.nlu.model import Metadata
import nltk
from nltk.classify import NaiveBayesClassifier
import os
import typing
from typing import Any, Optional, Text, Dict
SENTIMENT_MODEL_FILE_NAME = "sentiment_classifier.pkl"
class SentimentAnalyzer(Component):
"""A custom sentiment analysis component"""
name = "sentiment"
provides = ["entities"]
requires = ["tokens"]
defaults = {}
language_list = ["en"]
print('initialised the class')
def __init__(self, component_config=None):
super(SentimentAnalyzer, self).__init__(component_config)
def train(self, training_data, cfg, **kwargs):
"""Load the sentiment polarity labels from the text
file, retrieve training tokens and after formatting
data train the classifier."""
with open('labels.txt', 'r') as f:
labels =
training_data = training_data.training_examples #list of Message objects
tokens = [list(map(lambda x: x.text, t.get('tokens'))) for t in training_data]
processed_tokens = [self.preprocessing(t) for t in tokens]
labeled_data = [(t, x) for t,x in zip(processed_tokens, labels)]
self.clf = NaiveBayesClassifier.train(labeled_data)
def convert_to_rasa(self, value, confidence):
"""Convert model output into the Rasa NLU compatible output format."""
entity = {"value": value,
"confidence": confidence,
"entity": "sentiment",
"extractor": "sentiment_extractor"}
return entity
def preprocessing(self, tokens):
"""Create bag-of-words representation of the training examples."""
return ({word: True for word in tokens})
def process(self, message, **kwargs):
"""Retrieve the tokens of the new message, pass it to the classifier
and append prediction results to the message class."""
if not self.clf:
# component is either not trained or didn't
# receive enough training data
entity = None
tokens = [t.text for t in message.get("tokens")]
tb = self.preprocessing(tokens)
pred = self.clf.prob_classify(tb)
sentiment = pred.max()
confidence = pred.prob(sentiment)
entity = self.convert_to_rasa(sentiment, confidence)
message.set("entities", [entity], add_to_output=True)
def persist(self, file_name, model_dir):
"""Persist this model into the passed directory."""
classifier_file = os.path.join(model_dir, SENTIMENT_MODEL_FILE_NAME)
utils.json_pickle(classifier_file, self)
return {"classifier_file": SENTIMENT_MODEL_FILE_NAME}
def load(cls,
meta: Dict[Text, Any],
file_name = meta.get("classifier_file")
classifier_file = os.path.join(model_dir, file_name)
return utils.json_unpickle(classifier_file)

@prithvini04 prithvini04 commented Jun 24, 2019

can you please tell me what is self.clf and where is that defined?


@BREN1234 BREN1234 commented Jul 17, 2019

hi JustinaPetr
"training_data" what kind of format does it follow, can you little elaborated on that?



@vba34520 vba34520 commented May 14, 2020

Hi, I have read How to Enhance Rasa NLU Models with Custom Components, this tutorial is useful for me, thanks to your work!

I use the default to train, but the entities of Hello stupid bot is pos.

May you share the please?

Thank you very much, looking forward to your reply, good day.


@vba34520 vba34520 commented May 15, 2020

Input: Hello stupid bot
Output: neg with confidence of 0.333...

Python: 3.6.4
rasa: 1.18
rasa-sdk: 1.1.1

I find after train() and before persist(), the clf has been correctly trained.

So the problem is the utils.json_pickle in persist().

I change it to

with open(classifier_file, 'wb') as f:
    pickle.dump(self, f, pickle.HIGHEST_PROTOCOL)

and the load()

with open(classifier_file, 'rb') as f:
        return pickle.load(f)

it works!

