Created
July 10, 2024 07:31
-
-
Save YuviGold/850b7e5cab4b7e6264be591e184a7254 to your computer and use it in GitHub Desktop.
Whatsapp Group Exporter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Inspired by https://gist.github.com/shaneapen/3406477b9f946855d02e3f33ec121975 | |
The script scrapes the members of a WhatsApp group chat and exports the data to a CSV file. | |
It scrolls automatically and extracts each list item in the members list with all the information available. | |
Then it joins this information with the indexedDB data to get the groups the member is in and if the contact's info | |
if it is already saved in the phone. | |
Steps: | |
1. Open WhatsApp Web | |
2. Open the group chat you want to scrape -> | |
Click on the group name to open the group info -> | |
Click on the members list | |
3. Open the browser console (F12) | |
4. Copy and paste the code below into the console and press Enter | |
After the script has finished running, a download link will appear for the CSV file containing the scraped data. | |
*/ | |
const SCROLL_INTERVAL = 1000, | |
SCROLL_INCREMENT = 450, | |
AUTO_SCROLL = true, | |
CHECK_INDEXEDDB = true; | |
var scrollInterval, observer, membersList, header, MEMBERS_QUEUE; | |
class WhatsappDB { | |
#db; | |
#dbName = "model-storage"; | |
#groupsCollection = "group-metadata"; | |
#contactsCollection = "contact"; | |
#phoneNumberIndex = "phoneNumber"; | |
#participantsCollection = "participant"; | |
#participantsIndex = "participants"; | |
async openConnection() { | |
if (!this.#db) { | |
const dbName = this.#dbName; | |
this.#db = await new Promise((resolve, reject) => { | |
let request = indexedDB.open(dbName); | |
request.onerror = (event) => { | |
reject(event); | |
}; | |
request.onsuccess = (event) => { | |
resolve(event.target.result); | |
}; | |
}); | |
} | |
return this.#db; | |
} | |
async #promisifyCol(collection, index, query, count) { | |
const db = await this.openConnection(); | |
return new Promise((resolve, reject) => { | |
const transaction = db.transaction(collection, "readonly"); | |
const objectStore = transaction.objectStore(collection); | |
let request; | |
if (index) { | |
request = objectStore.index(index).getAll(query, count); | |
} else { | |
request = objectStore.getAll(query, count); | |
} | |
request.onerror = (event) => { | |
reject(event); | |
}; | |
request.onsuccess = (event) => { | |
resolve(event.target.result); | |
}; | |
}); | |
} | |
async getGroups() { | |
return this.#promisifyCol(this.#groupsCollection); | |
} | |
async getParticipants(key) { | |
return this.#promisifyCol(this.#participantsCollection, this.#participantsIndex, key); | |
} | |
async getContacts(key) { | |
return this.#promisifyCol(this.#contactsCollection, this.#phoneNumberIndex, key); | |
} | |
phoneToKey(phone) { | |
return `${phone.replace('+', '')}@c.us`; | |
} | |
} | |
whatsappDB = new WhatsappDB(); | |
var groups, contacts; | |
MutationObserver = window.MutationObserver || window.WebKitMutationObserver; | |
const autoScroll = function () { | |
if (!scrollEndReached(header.nextSibling.nextSibling)) | |
header.nextSibling.nextSibling.scrollTop += SCROLL_INCREMENT; | |
else | |
stop(); | |
}; | |
async function start() { | |
MEMBERS_QUEUE = {}; | |
if (CHECK_INDEXEDDB) { | |
groups = await whatsappDB.getGroups(); | |
contacts = await whatsappDB.getContacts(); | |
} | |
header = document.getElementsByTagName('header')[0]; | |
membersList = header.parentNode; | |
observer = new MutationObserver(function (mutations, observer) { | |
scrapeData().then(r => { | |
}); // fired when a mutation occurs | |
}); | |
// the div to watch for mutations | |
observer.observe(membersList, { | |
childList: true, | |
subtree: true | |
}); | |
// scroll to top before beginning | |
header.nextSibling.nextSibling.scrollTop = 0; | |
await scrapeData(); | |
if (AUTO_SCROLL) scrollInterval = setInterval(autoScroll, SCROLL_INTERVAL); | |
} | |
/** | |
* Stops the current scrape instance | |
*/ | |
const stop = function () { | |
window.clearInterval(scrollInterval); | |
observer.disconnect(); | |
console.table(MEMBERS_QUEUE); | |
console.log(`Scrape stopped. ${Object.keys(MEMBERS_QUEUE).length} members scraped.`); | |
createDownloadLink(convertToCSV(Object.values(MEMBERS_QUEUE)), "whatsapp_members.csv"); | |
}; | |
async function scrapeData() { | |
const members = membersList.querySelectorAll('[role=listitem] > [role=button]'); | |
for (let i = 0; i < members.length; i++) { | |
const member = members[i]; | |
const details = await handleMember(member) | |
if (details.name === "You") { | |
continue; | |
} | |
if (details.phone) | |
MEMBERS_QUEUE[details.phone] = details; | |
else | |
MEMBERS_QUEUE[details.name] = details; | |
} | |
} | |
async function handleMember(member) { | |
const title = getTitle(member); | |
const phoneCaption = getPhone(member); | |
const status = getStatus(member); | |
const image = getImage(member); | |
let memberGroups = []; | |
let isSaved = false; | |
// If contact unsaved - the phone is the caption or the title. | |
// If contact saved - the phone is unavailable. | |
let phone = phoneCaption ? phoneCaption : title.startsWith("+") ? title : null; | |
const name = phoneCaption || !phone ? title : null; | |
if (name && !phone) { | |
const contact = await getContact(name); | |
phone = contact ? `+${contact.phoneNumber.split('@')[0]}` : null; | |
isSaved = !!contact; | |
} | |
if (phone) { | |
phone = phone.replaceAll(/\s/g, '').replaceAll('-', '').replaceAll('(', '').replaceAll(')', ''); | |
memberGroups = await getGroups(phone); | |
} | |
return { | |
phone: phone, | |
name: name, | |
status: status, | |
image: image, | |
groups: JSON.stringify(memberGroups), | |
isSaved: JSON.stringify(isSaved), | |
}; | |
} | |
function getImage(member) { | |
const img = member.querySelector('img'); | |
if (!img) { | |
return null; | |
} | |
return imageToDataURL(img); | |
} | |
function getStatus(member) { | |
const status = member.querySelector('.copyable-text'); | |
return status ? status.title : null; | |
} | |
function getPhone(member) { | |
const phone = member.querySelector('span[aria-label=""]:not(span[title])'); | |
return phone ? phone.innerHTML : null; | |
} | |
function getTitle(member) { | |
const title = member.querySelector('span[title]'); | |
return title ? title.title : null; | |
} | |
async function getGroups(phone) { | |
if (!CHECK_INDEXEDDB) { | |
return []; | |
} | |
const key = whatsappDB.phoneToKey(phone); | |
const participants = await whatsappDB.getParticipants(key); | |
return participants.map(participant => getGroupName(participant.groupId)); | |
} | |
function getGroupName(groupID) { | |
const group = groups.find(group => group.id === groupID); | |
return group ? group.subject : null; | |
} | |
function getContact(name) { | |
if (!CHECK_INDEXEDDB) { | |
return null; | |
} | |
return contacts.find(contact => contact.name === name); | |
} | |
/** | |
* Helper functions | |
* @References [1] https://stackoverflow.com/questions/53158796/get-scroll-position-with-reactjs/53158893#53158893 | |
*/ | |
function scrollEndReached(el) { | |
return ((el.scrollHeight - (el.clientHeight + el.scrollTop)) === 0); | |
} | |
function imageToDataURL(img) { | |
img.crossOrigin = "anonymous"; | |
// Create a canvas element | |
const canvas = document.createElement('canvas'); | |
canvas.width = img.naturalWidth || img.width; | |
canvas.height = img.naturalHeight || img.height; | |
// Draw the image onto the canvas | |
const ctx = canvas.getContext('2d'); | |
ctx.drawImage(img, 0, 0); | |
// Get the Data URI of the image | |
return canvas.toDataURL('image/png'); | |
} | |
function createDownloadLink(data, fileName) { | |
var a = document.createElement('a'); | |
a.style.display = "none"; | |
var url = window.URL.createObjectURL(new Blob([data], { | |
type: "data:attachment/text" | |
})); | |
a.setAttribute("href", url); | |
a.setAttribute("download", fileName); | |
document.body.append(a); | |
a.click(); | |
window.URL.revokeObjectURL(url); | |
a.remove(); | |
} | |
// https://stackoverflow.com/questions/11257062/converting-json-object-to-csv-format-in-javascript | |
function convertToCSV(arr) { | |
const array = [Object.keys(arr[0])].concat(arr) | |
return array.map(it => { | |
return Object.values(it).map(value => { | |
if (value == null) return ''; | |
return `"${value.replace(/\"/g, "'")}"` | |
}).toString() | |
}).join('\n') | |
} | |
start().then(r => { | |
console.log("Finished scraping.") | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment