Last active
July 18, 2023 15:47
-
-
Save avinash010/03fc079ad0c9ce8144e67ed54369c2cd to your computer and use it in GitHub Desktop.
Sentiment Analysis compare models
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Compare the model performance of Roberta model against ChatGPT model for tweet dataset | |
""" | |
import pandas as pd | |
from sentiment_analysis_gpt import SentimentAnalyzerGPT | |
from sentiment_analysis_roberta import SentimentAnalyzerROBERTA | |
import csv | |
class SentimentAnalyzer: | |
"Class to analyze tweet sentiment" | |
def __init__(self): | |
"Initilize roberta and gpt model" | |
self.roberta_sentiment_analyzer = SentimentAnalyzerROBERTA() | |
self.gpt_sentiment_analyzer = SentimentAnalyzerGPT() | |
def load_tweets_from_csv(self, csv_file): | |
"Load tweets from a CSV file and return a list of tweet records" | |
df = pd.read_csv(csv_file) | |
return df[['ID', 'Tweet', 'Sentiment']].values.tolist() | |
def analyse_sentiment_roberta(self, tweets): | |
"Analyze sentiment using ROBERTA model" | |
roberta_result = self.roberta_sentiment_analyzer.classify_roberta_sentiment(tweets) | |
return roberta_result | |
def analyze_sentiment_gpt(self, tweets): | |
"Analyze sentiment using GPT model" | |
gpt_results = self.gpt_sentiment_analyzer.classify_gpt_sentiment( | |
tweets) | |
return gpt_results | |
def analyze_sentiment(self, csv_file): | |
"Perform sentiment analysis using ROBERTA and GPT models" | |
tweets = self.load_tweets_from_csv(csv_file) | |
roberta_results = self.analyse_sentiment_roberta(tweets) | |
gpt_results = self.analyze_sentiment_gpt(tweets) | |
return roberta_results, gpt_results | |
def get_mismatched_model_sentiments(self, model_results, tweets): | |
"Get tweets with mismatched sentiments between the model and actual sentiments" | |
mismatched_tweets = [] | |
for tweet_id, model_result in model_results.items(): | |
# In case sentiment received was in lowercase | |
actual_sentiment = tweets[tweet_id][2].upper() | |
if model_result.upper() != actual_sentiment: | |
tweet_text = tweets[tweet_id][1] | |
mismatched_tweets.append( | |
(tweet_id, tweet_text, actual_sentiment, model_result)) | |
return mismatched_tweets | |
def get_mismatched_tweets_between_models(self, model1_results, model2_results, tweets): | |
"Get tweets with mismatched sentiments between two models" | |
mismatched_tweets = [] | |
for tweet_id, model1_result in model1_results.items(): | |
model2_result = model2_results.get(tweet_id) | |
if model1_result.upper() != model2_result.upper(): | |
tweet_text = tweets[tweet_id][1] | |
mismatched_tweets.append( | |
(tweet_id, tweet_text, model1_result, model2_result)) | |
return mismatched_tweets | |
def save_sentiments_to_csv(self, csv_file, tweets, model1_results, model2_results): | |
"Save all sentiments to a common CSV file" | |
with open(csv_file, 'w+', encoding="utf8", newline='') as file: | |
writer = csv.writer(file) | |
writer.writerow(['ID', 'Tweet', 'Actual Sentiment', | |
'Model1 Sentiment', 'Model2 Sentiment']) | |
for (tweet_id, tweet, actual_sentiment) in tweets: | |
model1_sentiment = model1_results.get(tweet_id, '') | |
model2_sentiment = model2_results.get(tweet_id, '') | |
writer.writerow([tweet_id, tweet, actual_sentiment, | |
model1_sentiment, model2_sentiment]) | |
analyzer = SentimentAnalyzer() | |
CSV_FILE_NAME = "fifa_world_cup_2022_tweets.csv" | |
all_tweets = analyzer.load_tweets_from_csv(CSV_FILE_NAME) | |
roberta_analyze_results, gpt_analyze_results = analyzer.analyze_sentiment( | |
CSV_FILE_NAME) | |
OUTPUT_CSV_FILE = "sentiment_comparison.csv" | |
analyzer.save_sentiments_to_csv( | |
OUTPUT_CSV_FILE, all_tweets, roberta_analyze_results, gpt_analyze_results) | |
print("Sentiments saved to csv file") | |
# Get the mismatched ROBERTA sentiments with Actual sentiments | |
mismatched_roberta_sentiments = analyzer.get_mismatched_model_sentiments( | |
roberta_analyze_results, all_tweets) | |
print("No of mismatched ROBERTA Sentiments with Actual Sentiment:", | |
len(mismatched_roberta_sentiments)) | |
# Get the mismatched GPT sentiments with Actual sentiments | |
mismatched_gpt_sentiments = analyzer.get_mismatched_model_sentiments( | |
gpt_analyze_results, all_tweets) | |
print("No of mismatched GPT Sentiments with Actual Sentiment:", | |
len(mismatched_gpt_sentiments)) | |
# Get the mismatched tweets between ROBERTA And GPT model | |
mismatched_model_sentiments = analyzer.get_mismatched_tweets_between_models( | |
roberta_analyze_results, gpt_analyze_results, all_tweets) | |
print("No of mismatched Sentiments between ROBERTA and GPT:", | |
len(mismatched_model_sentiments)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Using Chat GPT for sentiment analysis | |
""" | |
import os | |
import openai | |
class SentimentAnalyzerGPT: | |
"Class to analyze GPT sentiment" | |
# Set up your OpenAI API credentials | |
openai.api_key = os.environ.get('OPENAI_API_KEY') | |
def classify_gpt_sentiment(self, tweets): | |
"Classify sentiment using GPT model" | |
gpt_results = {} | |
for tweet_id, tweet, _ in tweets: | |
gpt_result = self.generate_response(tweet) | |
gpt_results[tweet_id] = gpt_result | |
return gpt_results | |
def generate_response(self,dynamic_message): | |
"Combine the prompt and dynamic message" | |
prompt = "Please analyze the sentiment for the following football World Cup tweet and classify it as either POSITIVE, NEGATIVE, or NEUTRAL only. Ensure that the GPT response contains only the sentiment classifier in all caps, without any unnecessary characters or special symbols." | |
input_text = f"{prompt} {dynamic_message}" | |
# Define the parameters for the API call | |
response = openai.Completion.create( | |
model='text-davinci-003', | |
prompt=input_text | |
) | |
# Extract the generated text from the API response | |
generated_text = response.choices[0].text.strip() | |
# Removing some extra characters in case present in generated GPT text | |
if '\n' in generated_text: | |
generated_text = generated_text.split('\n', 1)[-1].strip() | |
return generated_text |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Roberta base Model "cardiffnlp/twitter-roberta-base-sentiment" for sentiment analysis | |
""" | |
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification | |
class SentimentAnalyzerROBERTA: | |
"Class to analyze Bert sentiment" | |
model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest" | |
#Use AutoModelForSequenceClassification and AutoTokenizer to load the pretrained model | |
model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
#Specify the model and tokenizer in the pipeline() | |
classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer) | |
def classify_roberta_sentiment(self, tweets): | |
"Classify sentiment using ROBERTA model" | |
roberta_results = {} | |
results = self.classifier([tweet for _, tweet, _ in tweets]) | |
for i in range(len(results)): | |
roberta_results[i] = results[i]['label'] | |
return roberta_results |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment