avinash010/sentiment_analysis.py

## sentiment_analysis.py
"""
Compare the model performance of Roberta model against ChatGPT model for tweet dataset
"""

import pandas as pd
from sentiment_analysis_gpt import SentimentAnalyzerGPT
from sentiment_analysis_roberta import SentimentAnalyzerROBERTA
import csv

class SentimentAnalyzer:
    "Class to analyze tweet sentiment"
    def __init__(self):
        "Initilize roberta and gpt model"
        self.roberta_sentiment_analyzer = SentimentAnalyzerROBERTA()
        self.gpt_sentiment_analyzer = SentimentAnalyzerGPT()

    def load_tweets_from_csv(self, csv_file):
        "Load tweets from a CSV file and return a list of tweet records"
        df = pd.read_csv(csv_file)
        return df[['ID', 'Tweet', 'Sentiment']].values.tolist()

    def analyse_sentiment_roberta(self, tweets):
        "Analyze sentiment using ROBERTA model"
        roberta_result = self.roberta_sentiment_analyzer.classify_roberta_sentiment(tweets)
        return roberta_result

    def analyze_sentiment_gpt(self, tweets):
        "Analyze sentiment using GPT model"
        gpt_results = self.gpt_sentiment_analyzer.classify_gpt_sentiment(
            tweets)
        return gpt_results

    def analyze_sentiment(self, csv_file):
        "Perform sentiment analysis using ROBERTA and GPT models"
        tweets = self.load_tweets_from_csv(csv_file)
        roberta_results = self.analyse_sentiment_roberta(tweets)
        gpt_results = self.analyze_sentiment_gpt(tweets)
        return roberta_results, gpt_results

    def get_mismatched_model_sentiments(self, model_results, tweets):
        "Get tweets with mismatched sentiments between the model and actual sentiments"
        mismatched_tweets = []
        for tweet_id, model_result in model_results.items():
            # In case sentiment received was in lowercase
            actual_sentiment = tweets[tweet_id][2].upper()
            if model_result.upper() != actual_sentiment:
                tweet_text = tweets[tweet_id][1]
                mismatched_tweets.append(
                    (tweet_id, tweet_text, actual_sentiment, model_result))
        return mismatched_tweets

    def get_mismatched_tweets_between_models(self, model1_results, model2_results, tweets):
        "Get tweets with mismatched sentiments between two models"
        mismatched_tweets = []
        for tweet_id, model1_result in model1_results.items():
            model2_result = model2_results.get(tweet_id)
            if model1_result.upper() != model2_result.upper():
                tweet_text = tweets[tweet_id][1]
                mismatched_tweets.append(
                    (tweet_id, tweet_text, model1_result, model2_result))
        return mismatched_tweets

    def save_sentiments_to_csv(self, csv_file, tweets, model1_results, model2_results):
        "Save all sentiments to a common CSV file"
        with open(csv_file, 'w+', encoding="utf8", newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['ID', 'Tweet', 'Actual Sentiment',
                            'Model1 Sentiment', 'Model2 Sentiment'])
            for (tweet_id, tweet, actual_sentiment) in tweets:
                model1_sentiment = model1_results.get(tweet_id, '')
                model2_sentiment = model2_results.get(tweet_id, '')
                writer.writerow([tweet_id, tweet, actual_sentiment,
                                model1_sentiment, model2_sentiment])

analyzer = SentimentAnalyzer()
CSV_FILE_NAME = "fifa_world_cup_2022_tweets.csv"

all_tweets = analyzer.load_tweets_from_csv(CSV_FILE_NAME)
roberta_analyze_results, gpt_analyze_results = analyzer.analyze_sentiment(
    CSV_FILE_NAME)

OUTPUT_CSV_FILE = "sentiment_comparison.csv"
analyzer.save_sentiments_to_csv(
    OUTPUT_CSV_FILE, all_tweets, roberta_analyze_results, gpt_analyze_results)
print("Sentiments saved to csv file")

# Get the mismatched ROBERTA sentiments with Actual sentiments
mismatched_roberta_sentiments = analyzer.get_mismatched_model_sentiments(
    roberta_analyze_results, all_tweets)
print("No of mismatched ROBERTA Sentiments with Actual Sentiment:",
      len(mismatched_roberta_sentiments))

# Get the mismatched GPT sentiments with Actual sentiments
mismatched_gpt_sentiments = analyzer.get_mismatched_model_sentiments(
    gpt_analyze_results, all_tweets)
print("No of mismatched GPT Sentiments with Actual Sentiment:",
      len(mismatched_gpt_sentiments))

# Get the mismatched tweets between ROBERTA And GPT model
mismatched_model_sentiments = analyzer.get_mismatched_tweets_between_models(
    roberta_analyze_results, gpt_analyze_results, all_tweets)
print("No of mismatched Sentiments between ROBERTA and GPT:",
      len(mismatched_model_sentiments))

## sentiment_analysis_gpt.py
"""
Using Chat GPT for sentiment analysis
"""
import os
import openai

class SentimentAnalyzerGPT:
    "Class to analyze GPT sentiment"
    # Set up your OpenAI API credentials
    openai.api_key = os.environ.get('OPENAI_API_KEY')

    def classify_gpt_sentiment(self, tweets):
        "Classify sentiment using GPT model"
        gpt_results = {}
        for tweet_id, tweet, _ in tweets:
            gpt_result = self.generate_response(tweet)
            gpt_results[tweet_id] = gpt_result
        return gpt_results

    def generate_response(self,dynamic_message):
        "Combine the prompt and dynamic message"
        prompt = "Please analyze the sentiment for the following football World Cup tweet and classify it as either POSITIVE, NEGATIVE, or NEUTRAL only. Ensure that the GPT response contains only the sentiment classifier in all caps, without any unnecessary characters or special symbols."
        input_text = f"{prompt} {dynamic_message}"

        # Define the parameters for the API call
        response = openai.Completion.create(
            model='text-davinci-003',
            prompt=input_text
        )
        # Extract the generated text from the API response
        generated_text = response.choices[0].text.strip()
        # Removing some extra characters in case present in generated GPT text
        if '\n' in generated_text:
            generated_text = generated_text.split('\n', 1)[-1].strip()
        return generated_text

## sentiment_analysis_roberta.py
"""
Roberta base Model "cardiffnlp/twitter-roberta-base-sentiment" for sentiment analysis
"""
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification

class SentimentAnalyzerROBERTA:
    "Class to analyze Bert sentiment"
    model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest"
    #Use AutoModelForSequenceClassification and AutoTokenizer to load the pretrained model
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    #Specify the model and tokenizer in the pipeline()
    classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

    def classify_roberta_sentiment(self, tweets):
        "Classify sentiment using ROBERTA model"
        roberta_results = {}
        results = self.classifier([tweet for _, tweet, _ in tweets])
        for i in range(len(results)):
            roberta_results[i] = results[i]['label']
        return roberta_results
	"""
	Compare the model performance of Roberta model against ChatGPT model for tweet dataset
	"""

	import pandas as pd
	from sentiment_analysis_gpt import SentimentAnalyzerGPT
	from sentiment_analysis_roberta import SentimentAnalyzerROBERTA
	import csv

	class SentimentAnalyzer:
	"Class to analyze tweet sentiment"
	def __init__(self):
	"Initilize roberta and gpt model"
	self.roberta_sentiment_analyzer = SentimentAnalyzerROBERTA()
	self.gpt_sentiment_analyzer = SentimentAnalyzerGPT()

	def load_tweets_from_csv(self, csv_file):
	"Load tweets from a CSV file and return a list of tweet records"
	df = pd.read_csv(csv_file)
	return df[['ID', 'Tweet', 'Sentiment']].values.tolist()

	def analyse_sentiment_roberta(self, tweets):
	"Analyze sentiment using ROBERTA model"
	roberta_result = self.roberta_sentiment_analyzer.classify_roberta_sentiment(tweets)
	return roberta_result

	def analyze_sentiment_gpt(self, tweets):
	"Analyze sentiment using GPT model"
	gpt_results = self.gpt_sentiment_analyzer.classify_gpt_sentiment(
	tweets)
	return gpt_results

	def analyze_sentiment(self, csv_file):
	"Perform sentiment analysis using ROBERTA and GPT models"
	tweets = self.load_tweets_from_csv(csv_file)
	roberta_results = self.analyse_sentiment_roberta(tweets)
	gpt_results = self.analyze_sentiment_gpt(tweets)
	return roberta_results, gpt_results

	def get_mismatched_model_sentiments(self, model_results, tweets):
	"Get tweets with mismatched sentiments between the model and actual sentiments"
	mismatched_tweets = []
	for tweet_id, model_result in model_results.items():
	# In case sentiment received was in lowercase
	actual_sentiment = tweets[tweet_id][2].upper()
	if model_result.upper() != actual_sentiment:
	tweet_text = tweets[tweet_id][1]
	mismatched_tweets.append(
	(tweet_id, tweet_text, actual_sentiment, model_result))
	return mismatched_tweets

	def get_mismatched_tweets_between_models(self, model1_results, model2_results, tweets):
	"Get tweets with mismatched sentiments between two models"
	mismatched_tweets = []
	for tweet_id, model1_result in model1_results.items():
	model2_result = model2_results.get(tweet_id)
	if model1_result.upper() != model2_result.upper():
	tweet_text = tweets[tweet_id][1]
	mismatched_tweets.append(
	(tweet_id, tweet_text, model1_result, model2_result))
	return mismatched_tweets

	def save_sentiments_to_csv(self, csv_file, tweets, model1_results, model2_results):
	"Save all sentiments to a common CSV file"
	with open(csv_file, 'w+', encoding="utf8", newline='') as file:
	writer = csv.writer(file)
	writer.writerow(['ID', 'Tweet', 'Actual Sentiment',
	'Model1 Sentiment', 'Model2 Sentiment'])
	for (tweet_id, tweet, actual_sentiment) in tweets:
	model1_sentiment = model1_results.get(tweet_id, '')
	model2_sentiment = model2_results.get(tweet_id, '')
	writer.writerow([tweet_id, tweet, actual_sentiment,
	model1_sentiment, model2_sentiment])

	analyzer = SentimentAnalyzer()
	CSV_FILE_NAME = "fifa_world_cup_2022_tweets.csv"

	all_tweets = analyzer.load_tweets_from_csv(CSV_FILE_NAME)
	roberta_analyze_results, gpt_analyze_results = analyzer.analyze_sentiment(
	CSV_FILE_NAME)

	OUTPUT_CSV_FILE = "sentiment_comparison.csv"
	analyzer.save_sentiments_to_csv(
	OUTPUT_CSV_FILE, all_tweets, roberta_analyze_results, gpt_analyze_results)
	print("Sentiments saved to csv file")

	# Get the mismatched ROBERTA sentiments with Actual sentiments
	mismatched_roberta_sentiments = analyzer.get_mismatched_model_sentiments(
	roberta_analyze_results, all_tweets)
	print("No of mismatched ROBERTA Sentiments with Actual Sentiment:",
	len(mismatched_roberta_sentiments))

	# Get the mismatched GPT sentiments with Actual sentiments
	mismatched_gpt_sentiments = analyzer.get_mismatched_model_sentiments(
	gpt_analyze_results, all_tweets)
	print("No of mismatched GPT Sentiments with Actual Sentiment:",
	len(mismatched_gpt_sentiments))

	# Get the mismatched tweets between ROBERTA And GPT model
	mismatched_model_sentiments = analyzer.get_mismatched_tweets_between_models(
	roberta_analyze_results, gpt_analyze_results, all_tweets)
	print("No of mismatched Sentiments between ROBERTA and GPT:",
	len(mismatched_model_sentiments))
	"""
	Using Chat GPT for sentiment analysis
	"""
	import os
	import openai

	class SentimentAnalyzerGPT:
	"Class to analyze GPT sentiment"
	# Set up your OpenAI API credentials
	openai.api_key = os.environ.get('OPENAI_API_KEY')

	def classify_gpt_sentiment(self, tweets):
	"Classify sentiment using GPT model"
	gpt_results = {}
	for tweet_id, tweet, _ in tweets:
	gpt_result = self.generate_response(tweet)
	gpt_results[tweet_id] = gpt_result
	return gpt_results

	def generate_response(self,dynamic_message):
	"Combine the prompt and dynamic message"
	prompt = "Please analyze the sentiment for the following football World Cup tweet and classify it as either POSITIVE, NEGATIVE, or NEUTRAL only. Ensure that the GPT response contains only the sentiment classifier in all caps, without any unnecessary characters or special symbols."
	input_text = f"{prompt} {dynamic_message}"

	# Define the parameters for the API call
	response = openai.Completion.create(
	model='text-davinci-003',
	prompt=input_text
	)
	# Extract the generated text from the API response
	generated_text = response.choices[0].text.strip()
	# Removing some extra characters in case present in generated GPT text
	if '\n' in generated_text:
	generated_text = generated_text.split('\n', 1)[-1].strip()
	return generated_text
	"""
	Roberta base Model "cardiffnlp/twitter-roberta-base-sentiment" for sentiment analysis
	"""
	from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification

	class SentimentAnalyzerROBERTA:
	"Class to analyze Bert sentiment"
	model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest"
	#Use AutoModelForSequenceClassification and AutoTokenizer to load the pretrained model
	model = AutoModelForSequenceClassification.from_pretrained(model_name)
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	#Specify the model and tokenizer in the pipeline()
	classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

	def classify_roberta_sentiment(self, tweets):
	"Classify sentiment using ROBERTA model"
	roberta_results = {}
	results = self.classifier([tweet for _, tweet, _ in tweets])
	for i in range(len(results)):
	roberta_results[i] = results[i]['label']
	return roberta_results