If the scrolling stops, try incrementing the zoom (CTRL + +)
Last active
June 26, 2023 09:16
-
-
Save e9x/7ae1fb15589aee3e3b5342cd71df8b43 to your computer and use it in GitHub Desktop.
Google Contacts Scraper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(async () => { | |
// CONSTANTS | |
// modify values accordingly | |
/** | |
* if the script should automatically scroll to the top | |
*/ | |
const startFromTop = true; | |
// first column is always the avatar URL | |
const columns = [ | |
"Name", | |
"Email", | |
"Phone number", | |
"Job title & company", | |
"Labels", | |
]; | |
/** | |
* used by script for preventing duplicate contacts | |
* specify the name of the email column | |
*/ | |
const emailColumn = columns.indexOf("Email"); | |
// Same as columns except: | |
// Avatars | |
const newColumns = [...columns]; | |
// Move the email column to the beginning because it's used as the index | |
{ | |
const email = columns[emailColumn]; | |
newColumns.splice(emailColumn, 1); | |
newColumns.splice(0, 0, email); | |
// Push avatar to the end | |
} | |
newColumns.push("Photo"); | |
/** | |
* | |
* @param {string[]} data The same data that is according to `columns` | |
* @param {string} avatarURL | |
* @returns {string[]} Sorted data that is according to `newColumns`. | |
*/ | |
function modifyData(data, avatarURL) { | |
const newData = [...data, avatarURL]; | |
// Move the email to the start | |
const email = newData[emailColumn]; | |
newData.splice(emailColumn, 1); | |
newData.splice(0, 0, email); | |
return newData; | |
} | |
/** | |
* @type {Map<string, string[]>} | |
*/ | |
const contacts = new Map(); | |
/** | |
* @type {HTMLDivElement|undefined} | |
*/ | |
const contactsContainer = [ | |
...document.querySelectorAll("[data-list-type='10']"), | |
].find((e) => e.style.height !== ""); | |
if (!contactsContainer) | |
throw new TypeError( | |
"Failure finding the contactsContainer. Make sure you're on the list of directory contacts." | |
); | |
// find the scroll container | |
/** | |
* @type {HTMLElement|undefined} | |
*/ | |
const cWiz = | |
contactsContainer.parentNode.parentNode.parentNode.parentNode.parentNode; | |
if (cWiz?.nodeName !== "C-WIZ") | |
throw new TypeError("Failure finding the c-wiz element."); | |
if (startFromTop) { | |
cWiz.scrollTo(0, 0); | |
// wait for the contacts interface to cleanup the further down contacts | |
// this might be redundant now that only visible contacts are used: | |
await sleep(1e3); | |
} | |
const observer = new MutationObserver((mutations) => { | |
for (const mutation of mutations) | |
for (const node of mutation.addedNodes) { | |
const read = readContact(node); | |
if (read.loading) continue; | |
// [0] is always email | |
contacts.set(read.data[0], read.data); | |
} | |
}); | |
observer.observe(contactsContainer, { | |
childList: true, | |
}); | |
let doFrame = true; | |
const logInterval = setInterval(() => { | |
console.log(`${contacts.size} contacts`); | |
}, 2e3); | |
function frame() { | |
if (!doFrame) return; | |
setTimeout(frame, 80); | |
// We can't scroll if there's loading elements | |
// But we can scroll from the start if the loading elements are only at the end | |
// As soon as we encounter a loading contact, we stop scrolling | |
/** | |
* @type {Element | undefined} | |
*/ | |
let lastNode; | |
for (const node of visibleContacts()) { | |
if (readContact(node).loading) break; | |
// top += node.screenHeight; | |
lastNode = node; | |
} | |
if (lastNode) | |
lastNode.scrollIntoView({ | |
block: "end", | |
behavior: "auto", | |
inline: "end", | |
}); | |
// try to max out the scrolling | |
cWiz.scrollBy(0, 20); | |
if (isScrolledToBottom(cWiz)) complete(); | |
// put it back | |
else cWiz.scrollBy(0, -20); | |
} | |
requestAnimationFrame(frame); | |
fetchContacts(); | |
/** | |
* | |
* Creates a CSV with the column names at the top and rows with the data. | |
* @param {string[]} columns | |
* @param {string[][]} data | |
* @returns {string} | |
*/ | |
function createCSV(columns, data) { | |
const header = columns.join(",") + "\n"; | |
const rows = data | |
.map((rowData) => rowData.map((val) => JSON.stringify(val)).join(",")) | |
.join("\n"); | |
return header + rows; | |
} | |
function downloadContacts() { | |
const anchor = document.createElement("a"); | |
anchor.target = "_blank"; | |
anchor.download = "contacts.csv"; | |
anchor.href = URL.createObjectURL( | |
new Blob([createCSV(newColumns, [...contacts.values()])]) | |
); | |
anchor.click(); | |
URL.revokeObjectURL(anchor.href); | |
} | |
function complete() { | |
console.log("DONE"); | |
// lag: | |
// console.log(contacts); | |
downloadContacts(); | |
observer.disconnect(); | |
clearInterval(logInterval); | |
doFrame = false; | |
} | |
function fetchContacts() { | |
for (const node of visibleContacts()) { | |
const read = readContact(node); | |
if (read.loading) continue; | |
contacts.set(read.email, read.data); | |
} | |
} | |
/** | |
* Get only the visible (on screen) children of the contacts container | |
*/ | |
function visibleContacts() { | |
// negative Y means it's outside the container | |
return [...contactsContainer.children].filter((e) => { | |
const rect = e.getBoundingClientRect(); | |
return rect.y > 0 || cWiz.getBoundingClientRect().bottom; | |
}); | |
} | |
/** | |
* | |
* @param {HTMLDivElement} row | |
* @returns {{ loading: true; } | { loading: false; data: string[]; }} | |
*/ | |
function readContact(row) { | |
if (row.textContent === ". . .") return { loading: true }; | |
const [rowsContainer] = row.children; | |
// first child is checkbox | |
const [avatarElement, ...rowElements] = rowsContainer.children; | |
const avatarURL = avatarElement.children[1].src; | |
const data = columns.map((column, i) => rowElements[i]?.textContent); | |
return { | |
data: modifyData(data, avatarURL), | |
loading: false, | |
}; | |
} | |
/** | |
* | |
* @param {HTMLElement} container | |
*/ | |
function isScrolledToBottom(container) { | |
const scrollBottom = container.scrollTop + container.clientHeight; | |
return scrollBottom >= container.scrollHeight; | |
} | |
/** | |
* | |
* @param {number} ms | |
* @returns {Promise<void>} | |
*/ | |
function sleep(ms) { | |
return new Promise((resolve) => setTimeout(() => resolve(), ms)); | |
} | |
/*setTimeout(() => { | |
complete(); | |
}, 10e3);*/ | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment