Skip to content

Instantly share code, notes, and snippets.

@YuviGold
Created July 10, 2024 07:31
Show Gist options
  • Save YuviGold/850b7e5cab4b7e6264be591e184a7254 to your computer and use it in GitHub Desktop.
Save YuviGold/850b7e5cab4b7e6264be591e184a7254 to your computer and use it in GitHub Desktop.
Whatsapp Group Exporter
/*
Inspired by https://gist.github.com/shaneapen/3406477b9f946855d02e3f33ec121975
The script scrapes the members of a WhatsApp group chat and exports the data to a CSV file.
It scrolls automatically and extracts each list item in the members list with all the information available.
Then it joins this information with the indexedDB data to get the groups the member is in and if the contact's info
if it is already saved in the phone.
Steps:
1. Open WhatsApp Web
2. Open the group chat you want to scrape ->
Click on the group name to open the group info ->
Click on the members list
3. Open the browser console (F12)
4. Copy and paste the code below into the console and press Enter
After the script has finished running, a download link will appear for the CSV file containing the scraped data.
*/
const SCROLL_INTERVAL = 1000,
SCROLL_INCREMENT = 450,
AUTO_SCROLL = true,
CHECK_INDEXEDDB = true;
var scrollInterval, observer, membersList, header, MEMBERS_QUEUE;
class WhatsappDB {
#db;
#dbName = "model-storage";
#groupsCollection = "group-metadata";
#contactsCollection = "contact";
#phoneNumberIndex = "phoneNumber";
#participantsCollection = "participant";
#participantsIndex = "participants";
async openConnection() {
if (!this.#db) {
const dbName = this.#dbName;
this.#db = await new Promise((resolve, reject) => {
let request = indexedDB.open(dbName);
request.onerror = (event) => {
reject(event);
};
request.onsuccess = (event) => {
resolve(event.target.result);
};
});
}
return this.#db;
}
async #promisifyCol(collection, index, query, count) {
const db = await this.openConnection();
return new Promise((resolve, reject) => {
const transaction = db.transaction(collection, "readonly");
const objectStore = transaction.objectStore(collection);
let request;
if (index) {
request = objectStore.index(index).getAll(query, count);
} else {
request = objectStore.getAll(query, count);
}
request.onerror = (event) => {
reject(event);
};
request.onsuccess = (event) => {
resolve(event.target.result);
};
});
}
async getGroups() {
return this.#promisifyCol(this.#groupsCollection);
}
async getParticipants(key) {
return this.#promisifyCol(this.#participantsCollection, this.#participantsIndex, key);
}
async getContacts(key) {
return this.#promisifyCol(this.#contactsCollection, this.#phoneNumberIndex, key);
}
phoneToKey(phone) {
return `${phone.replace('+', '')}@c.us`;
}
}
whatsappDB = new WhatsappDB();
var groups, contacts;
MutationObserver = window.MutationObserver || window.WebKitMutationObserver;
const autoScroll = function () {
if (!scrollEndReached(header.nextSibling.nextSibling))
header.nextSibling.nextSibling.scrollTop += SCROLL_INCREMENT;
else
stop();
};
async function start() {
MEMBERS_QUEUE = {};
if (CHECK_INDEXEDDB) {
groups = await whatsappDB.getGroups();
contacts = await whatsappDB.getContacts();
}
header = document.getElementsByTagName('header')[0];
membersList = header.parentNode;
observer = new MutationObserver(function (mutations, observer) {
scrapeData().then(r => {
}); // fired when a mutation occurs
});
// the div to watch for mutations
observer.observe(membersList, {
childList: true,
subtree: true
});
// scroll to top before beginning
header.nextSibling.nextSibling.scrollTop = 0;
await scrapeData();
if (AUTO_SCROLL) scrollInterval = setInterval(autoScroll, SCROLL_INTERVAL);
}
/**
* Stops the current scrape instance
*/
const stop = function () {
window.clearInterval(scrollInterval);
observer.disconnect();
console.table(MEMBERS_QUEUE);
console.log(`Scrape stopped. ${Object.keys(MEMBERS_QUEUE).length} members scraped.`);
createDownloadLink(convertToCSV(Object.values(MEMBERS_QUEUE)), "whatsapp_members.csv");
};
async function scrapeData() {
const members = membersList.querySelectorAll('[role=listitem] > [role=button]');
for (let i = 0; i < members.length; i++) {
const member = members[i];
const details = await handleMember(member)
if (details.name === "You") {
continue;
}
if (details.phone)
MEMBERS_QUEUE[details.phone] = details;
else
MEMBERS_QUEUE[details.name] = details;
}
}
async function handleMember(member) {
const title = getTitle(member);
const phoneCaption = getPhone(member);
const status = getStatus(member);
const image = getImage(member);
let memberGroups = [];
let isSaved = false;
// If contact unsaved - the phone is the caption or the title.
// If contact saved - the phone is unavailable.
let phone = phoneCaption ? phoneCaption : title.startsWith("+") ? title : null;
const name = phoneCaption || !phone ? title : null;
if (name && !phone) {
const contact = await getContact(name);
phone = contact ? `+${contact.phoneNumber.split('@')[0]}` : null;
isSaved = !!contact;
}
if (phone) {
phone = phone.replaceAll(/\s/g, '').replaceAll('-', '').replaceAll('(', '').replaceAll(')', '');
memberGroups = await getGroups(phone);
}
return {
phone: phone,
name: name,
status: status,
image: image,
groups: JSON.stringify(memberGroups),
isSaved: JSON.stringify(isSaved),
};
}
function getImage(member) {
const img = member.querySelector('img');
if (!img) {
return null;
}
return imageToDataURL(img);
}
function getStatus(member) {
const status = member.querySelector('.copyable-text');
return status ? status.title : null;
}
function getPhone(member) {
const phone = member.querySelector('span[aria-label=""]:not(span[title])');
return phone ? phone.innerHTML : null;
}
function getTitle(member) {
const title = member.querySelector('span[title]');
return title ? title.title : null;
}
async function getGroups(phone) {
if (!CHECK_INDEXEDDB) {
return [];
}
const key = whatsappDB.phoneToKey(phone);
const participants = await whatsappDB.getParticipants(key);
return participants.map(participant => getGroupName(participant.groupId));
}
function getGroupName(groupID) {
const group = groups.find(group => group.id === groupID);
return group ? group.subject : null;
}
function getContact(name) {
if (!CHECK_INDEXEDDB) {
return null;
}
return contacts.find(contact => contact.name === name);
}
/**
* Helper functions
* @References [1] https://stackoverflow.com/questions/53158796/get-scroll-position-with-reactjs/53158893#53158893
*/
function scrollEndReached(el) {
return ((el.scrollHeight - (el.clientHeight + el.scrollTop)) === 0);
}
function imageToDataURL(img) {
img.crossOrigin = "anonymous";
// Create a canvas element
const canvas = document.createElement('canvas');
canvas.width = img.naturalWidth || img.width;
canvas.height = img.naturalHeight || img.height;
// Draw the image onto the canvas
const ctx = canvas.getContext('2d');
ctx.drawImage(img, 0, 0);
// Get the Data URI of the image
return canvas.toDataURL('image/png');
}
function createDownloadLink(data, fileName) {
var a = document.createElement('a');
a.style.display = "none";
var url = window.URL.createObjectURL(new Blob([data], {
type: "data:attachment/text"
}));
a.setAttribute("href", url);
a.setAttribute("download", fileName);
document.body.append(a);
a.click();
window.URL.revokeObjectURL(url);
a.remove();
}
// https://stackoverflow.com/questions/11257062/converting-json-object-to-csv-format-in-javascript
function convertToCSV(arr) {
const array = [Object.keys(arr[0])].concat(arr)
return array.map(it => {
return Object.values(it).map(value => {
if (value == null) return '';
return `"${value.replace(/\"/g, "'")}"`
}).toString()
}).join('\n')
}
start().then(r => {
console.log("Finished scraping.")
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment