Skip to content

Instantly share code, notes, and snippets.

@karpatic
Created September 9, 2024 02:21
Show Gist options
  • Save karpatic/90b261eabc26efb04d6c49dc4dc663f7 to your computer and use it in GitHub Desktop.
Save karpatic/90b261eabc26efb04d6c49dc4dc663f7 to your computer and use it in GitHub Desktop.
Facebook Conversation Scraper
let talker = 'left';
let messageData = [];
// Helper function to add a delay (1 second)
const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
// Select the third grid element (index 2 since it's 0-based)
let grid = document.querySelectorAll('div[role="grid"]')[2];
// Function to check if a message already exists in the messageData array
const isMessageInList = (text) => {
return messageData.some(message => message.text === text);
};
// Function to process messages with a 1-second delay between each scroll
async function processMessages() {
const chats = grid.querySelectorAll(".x78zum5");
for (let i = 0; i < chats.length; i++) {
const chat = chats[i];
let text = chat.querySelector(".x1gslohp")?.innerText || '';
// Scroll to the chat element
chat.scrollIntoView();
// Determine if the message is from "You sent" (right) or another user (left)
if (chat.querySelector("h4 span")?.innerText === "You sent") {
talker = "right";
} else {
talker = "left"; // Reset to left for other messages
}
// Add the message data if it's not already in the list
if (text && !isMessageInList(text)) {
messageData.push({ talker, text });
} else {
console.log({ text, reason: "Duplicate message skipped" });
}
// Wait for 1 second before proceeding to the next element
await delay(1000);
}
console.log(messageData);
}
// Call the function to process the messages
processMessages();
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
# No need to define 'data', assuming it's already defined before this
# Convert JSON to DataFrame
df = pd.DataFrame(data)
# Word count per speaker
df['word_count'] = df['text'].apply(lambda x: len(x.split()))
# Chat distribution
chat_distribution = df['talker'].value_counts()
# Preparing data for emotion analysis using keras tokenizer
max_words = 10000 # Limit vocabulary size
max_len = 100 # Limit sentence length
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(df['text'])
# Convert text to sequence of tokens
sequences = tokenizer.texts_to_sequences(df['text'])
# Pad sequences to ensure uniform input shape
data_pad = pad_sequences(sequences, maxlen=max_len)
# Load or build your tf.keras model for emotion classification
# Assuming you're using a pre-trained model or a custom-built one.
# Modify this as per your needs or load an existing model.
emotion_model = tf.keras.models.Sequential([
tf.keras.layers.Embedding(max_words, 128, input_length=max_len),
tf.keras.layers.LSTM(64, return_sequences=False),
tf.keras.layers.Dense(8, activation='softmax') # Expanding to 8 possible emotions
])
# Compile the model (modify as needed if you're loading a pre-trained one)
emotion_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# Here, assuming you already have a trained model. Use emotion_model.fit() if you need to train.
# emotion_model.load_weights("path_to_pretrained_model_weights.h5")
# Predict emotions
emotion_predictions = emotion_model.predict(data_pad)
# Expanded list of emotions (including more negative ones)
emotion_labels = ['anger', 'fear', 'joy', 'love', 'sadness', 'surprise', 'disgust', 'frustration']
# Map predictions to the corresponding emotion labels
df['emotion'] = [emotion_labels[np.argmax(pred)] for pred in emotion_predictions]
import matplotlib.pyplot as plt
import seaborn as sns
# Group data by speaker and emotion to get the distribution of emotions for each speaker
emotion_distribution_by_speaker = df.groupby(['talker', 'emotion']).size().unstack().fillna(0)
# Visualize the emotion distribution by speaker
plt.figure(figsize=(12, 8))
emotion_distribution_by_speaker.plot(kind='bar', stacked=True, figsize=(12, 8))
plt.title("Emotion Distribution by Speaker")
plt.xlabel("Speaker")
plt.ylabel("Emotion Count")
plt.legend(title="Emotion", bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()
# If you want to visualize this separately for each speaker:
for speaker in df['talker'].unique():
plt.figure(figsize=(10, 6))
speaker_data = df[df['talker'] == speaker]
emotion_counts = speaker_data['emotion'].value_counts()
sns.barplot(x=emotion_counts.index, y=emotion_counts.values)
plt.title(f"Emotion Distribution for {speaker}")
plt.xlabel("Emotion")
plt.ylabel("Count")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment