Skip to content

Instantly share code, notes, and snippets.

@avinash010
Last active July 18, 2023 15:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save avinash010/03fc079ad0c9ce8144e67ed54369c2cd to your computer and use it in GitHub Desktop.
Save avinash010/03fc079ad0c9ce8144e67ed54369c2cd to your computer and use it in GitHub Desktop.
Sentiment Analysis compare models
"""
Compare the model performance of Roberta model against ChatGPT model for tweet dataset
"""
import pandas as pd
from sentiment_analysis_gpt import SentimentAnalyzerGPT
from sentiment_analysis_roberta import SentimentAnalyzerROBERTA
import csv
class SentimentAnalyzer:
"Class to analyze tweet sentiment"
def __init__(self):
"Initilize roberta and gpt model"
self.roberta_sentiment_analyzer = SentimentAnalyzerROBERTA()
self.gpt_sentiment_analyzer = SentimentAnalyzerGPT()
def load_tweets_from_csv(self, csv_file):
"Load tweets from a CSV file and return a list of tweet records"
df = pd.read_csv(csv_file)
return df[['ID', 'Tweet', 'Sentiment']].values.tolist()
def analyse_sentiment_roberta(self, tweets):
"Analyze sentiment using ROBERTA model"
roberta_result = self.roberta_sentiment_analyzer.classify_roberta_sentiment(tweets)
return roberta_result
def analyze_sentiment_gpt(self, tweets):
"Analyze sentiment using GPT model"
gpt_results = self.gpt_sentiment_analyzer.classify_gpt_sentiment(
tweets)
return gpt_results
def analyze_sentiment(self, csv_file):
"Perform sentiment analysis using ROBERTA and GPT models"
tweets = self.load_tweets_from_csv(csv_file)
roberta_results = self.analyse_sentiment_roberta(tweets)
gpt_results = self.analyze_sentiment_gpt(tweets)
return roberta_results, gpt_results
def get_mismatched_model_sentiments(self, model_results, tweets):
"Get tweets with mismatched sentiments between the model and actual sentiments"
mismatched_tweets = []
for tweet_id, model_result in model_results.items():
# In case sentiment received was in lowercase
actual_sentiment = tweets[tweet_id][2].upper()
if model_result.upper() != actual_sentiment:
tweet_text = tweets[tweet_id][1]
mismatched_tweets.append(
(tweet_id, tweet_text, actual_sentiment, model_result))
return mismatched_tweets
def get_mismatched_tweets_between_models(self, model1_results, model2_results, tweets):
"Get tweets with mismatched sentiments between two models"
mismatched_tweets = []
for tweet_id, model1_result in model1_results.items():
model2_result = model2_results.get(tweet_id)
if model1_result.upper() != model2_result.upper():
tweet_text = tweets[tweet_id][1]
mismatched_tweets.append(
(tweet_id, tweet_text, model1_result, model2_result))
return mismatched_tweets
def save_sentiments_to_csv(self, csv_file, tweets, model1_results, model2_results):
"Save all sentiments to a common CSV file"
with open(csv_file, 'w+', encoding="utf8", newline='') as file:
writer = csv.writer(file)
writer.writerow(['ID', 'Tweet', 'Actual Sentiment',
'Model1 Sentiment', 'Model2 Sentiment'])
for (tweet_id, tweet, actual_sentiment) in tweets:
model1_sentiment = model1_results.get(tweet_id, '')
model2_sentiment = model2_results.get(tweet_id, '')
writer.writerow([tweet_id, tweet, actual_sentiment,
model1_sentiment, model2_sentiment])
analyzer = SentimentAnalyzer()
CSV_FILE_NAME = "fifa_world_cup_2022_tweets.csv"
all_tweets = analyzer.load_tweets_from_csv(CSV_FILE_NAME)
roberta_analyze_results, gpt_analyze_results = analyzer.analyze_sentiment(
CSV_FILE_NAME)
OUTPUT_CSV_FILE = "sentiment_comparison.csv"
analyzer.save_sentiments_to_csv(
OUTPUT_CSV_FILE, all_tweets, roberta_analyze_results, gpt_analyze_results)
print("Sentiments saved to csv file")
# Get the mismatched ROBERTA sentiments with Actual sentiments
mismatched_roberta_sentiments = analyzer.get_mismatched_model_sentiments(
roberta_analyze_results, all_tweets)
print("No of mismatched ROBERTA Sentiments with Actual Sentiment:",
len(mismatched_roberta_sentiments))
# Get the mismatched GPT sentiments with Actual sentiments
mismatched_gpt_sentiments = analyzer.get_mismatched_model_sentiments(
gpt_analyze_results, all_tweets)
print("No of mismatched GPT Sentiments with Actual Sentiment:",
len(mismatched_gpt_sentiments))
# Get the mismatched tweets between ROBERTA And GPT model
mismatched_model_sentiments = analyzer.get_mismatched_tweets_between_models(
roberta_analyze_results, gpt_analyze_results, all_tweets)
print("No of mismatched Sentiments between ROBERTA and GPT:",
len(mismatched_model_sentiments))
"""
Using Chat GPT for sentiment analysis
"""
import os
import openai
class SentimentAnalyzerGPT:
"Class to analyze GPT sentiment"
# Set up your OpenAI API credentials
openai.api_key = os.environ.get('OPENAI_API_KEY')
def classify_gpt_sentiment(self, tweets):
"Classify sentiment using GPT model"
gpt_results = {}
for tweet_id, tweet, _ in tweets:
gpt_result = self.generate_response(tweet)
gpt_results[tweet_id] = gpt_result
return gpt_results
def generate_response(self,dynamic_message):
"Combine the prompt and dynamic message"
prompt = "Please analyze the sentiment for the following football World Cup tweet and classify it as either POSITIVE, NEGATIVE, or NEUTRAL only. Ensure that the GPT response contains only the sentiment classifier in all caps, without any unnecessary characters or special symbols."
input_text = f"{prompt} {dynamic_message}"
# Define the parameters for the API call
response = openai.Completion.create(
model='text-davinci-003',
prompt=input_text
)
# Extract the generated text from the API response
generated_text = response.choices[0].text.strip()
# Removing some extra characters in case present in generated GPT text
if '\n' in generated_text:
generated_text = generated_text.split('\n', 1)[-1].strip()
return generated_text
"""
Roberta base Model "cardiffnlp/twitter-roberta-base-sentiment" for sentiment analysis
"""
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
class SentimentAnalyzerROBERTA:
"Class to analyze Bert sentiment"
model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest"
#Use AutoModelForSequenceClassification and AutoTokenizer to load the pretrained model
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
#Specify the model and tokenizer in the pipeline()
classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
def classify_roberta_sentiment(self, tweets):
"Classify sentiment using ROBERTA model"
roberta_results = {}
results = self.classifier([tweet for _, tweet, _ in tweets])
for i in range(len(results)):
roberta_results[i] = results[i]['label']
return roberta_results
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment