Skip to content

Instantly share code, notes, and snippets.

@e9x
Last active June 26, 2023 09:16
Show Gist options
  • Save e9x/7ae1fb15589aee3e3b5342cd71df8b43 to your computer and use it in GitHub Desktop.
Save e9x/7ae1fb15589aee3e3b5342cd71df8b43 to your computer and use it in GitHub Desktop.
Google Contacts Scraper

If the scrolling stops, try incrementing the zoom (CTRL + +)

(async () => {
// CONSTANTS
// modify values accordingly
/**
* if the script should automatically scroll to the top
*/
const startFromTop = true;
// first column is always the avatar URL
const columns = [
"Name",
"Email",
"Phone number",
"Job title & company",
"Labels",
];
/**
* used by script for preventing duplicate contacts
* specify the name of the email column
*/
const emailColumn = columns.indexOf("Email");
// Same as columns except:
// Avatars
const newColumns = [...columns];
// Move the email column to the beginning because it's used as the index
{
const email = columns[emailColumn];
newColumns.splice(emailColumn, 1);
newColumns.splice(0, 0, email);
// Push avatar to the end
}
newColumns.push("Photo");
/**
*
* @param {string[]} data The same data that is according to `columns`
* @param {string} avatarURL
* @returns {string[]} Sorted data that is according to `newColumns`.
*/
function modifyData(data, avatarURL) {
const newData = [...data, avatarURL];
// Move the email to the start
const email = newData[emailColumn];
newData.splice(emailColumn, 1);
newData.splice(0, 0, email);
return newData;
}
/**
* @type {Map<string, string[]>}
*/
const contacts = new Map();
/**
* @type {HTMLDivElement|undefined}
*/
const contactsContainer = [
...document.querySelectorAll("[data-list-type='10']"),
].find((e) => e.style.height !== "");
if (!contactsContainer)
throw new TypeError(
"Failure finding the contactsContainer. Make sure you're on the list of directory contacts."
);
// find the scroll container
/**
* @type {HTMLElement|undefined}
*/
const cWiz =
contactsContainer.parentNode.parentNode.parentNode.parentNode.parentNode;
if (cWiz?.nodeName !== "C-WIZ")
throw new TypeError("Failure finding the c-wiz element.");
if (startFromTop) {
cWiz.scrollTo(0, 0);
// wait for the contacts interface to cleanup the further down contacts
// this might be redundant now that only visible contacts are used:
await sleep(1e3);
}
const observer = new MutationObserver((mutations) => {
for (const mutation of mutations)
for (const node of mutation.addedNodes) {
const read = readContact(node);
if (read.loading) continue;
// [0] is always email
contacts.set(read.data[0], read.data);
}
});
observer.observe(contactsContainer, {
childList: true,
});
let doFrame = true;
const logInterval = setInterval(() => {
console.log(`${contacts.size} contacts`);
}, 2e3);
function frame() {
if (!doFrame) return;
setTimeout(frame, 80);
// We can't scroll if there's loading elements
// But we can scroll from the start if the loading elements are only at the end
// As soon as we encounter a loading contact, we stop scrolling
/**
* @type {Element | undefined}
*/
let lastNode;
for (const node of visibleContacts()) {
if (readContact(node).loading) break;
// top += node.screenHeight;
lastNode = node;
}
if (lastNode)
lastNode.scrollIntoView({
block: "end",
behavior: "auto",
inline: "end",
});
// try to max out the scrolling
cWiz.scrollBy(0, 20);
if (isScrolledToBottom(cWiz)) complete();
// put it back
else cWiz.scrollBy(0, -20);
}
requestAnimationFrame(frame);
fetchContacts();
/**
*
* Creates a CSV with the column names at the top and rows with the data.
* @param {string[]} columns
* @param {string[][]} data
* @returns {string}
*/
function createCSV(columns, data) {
const header = columns.join(",") + "\n";
const rows = data
.map((rowData) => rowData.map((val) => JSON.stringify(val)).join(","))
.join("\n");
return header + rows;
}
function downloadContacts() {
const anchor = document.createElement("a");
anchor.target = "_blank";
anchor.download = "contacts.csv";
anchor.href = URL.createObjectURL(
new Blob([createCSV(newColumns, [...contacts.values()])])
);
anchor.click();
URL.revokeObjectURL(anchor.href);
}
function complete() {
console.log("DONE");
// lag:
// console.log(contacts);
downloadContacts();
observer.disconnect();
clearInterval(logInterval);
doFrame = false;
}
function fetchContacts() {
for (const node of visibleContacts()) {
const read = readContact(node);
if (read.loading) continue;
contacts.set(read.email, read.data);
}
}
/**
* Get only the visible (on screen) children of the contacts container
*/
function visibleContacts() {
// negative Y means it's outside the container
return [...contactsContainer.children].filter((e) => {
const rect = e.getBoundingClientRect();
return rect.y > 0 || cWiz.getBoundingClientRect().bottom;
});
}
/**
*
* @param {HTMLDivElement} row
* @returns {{ loading: true; } | { loading: false; data: string[]; }}
*/
function readContact(row) {
if (row.textContent === ". . .") return { loading: true };
const [rowsContainer] = row.children;
// first child is checkbox
const [avatarElement, ...rowElements] = rowsContainer.children;
const avatarURL = avatarElement.children[1].src;
const data = columns.map((column, i) => rowElements[i]?.textContent);
return {
data: modifyData(data, avatarURL),
loading: false,
};
}
/**
*
* @param {HTMLElement} container
*/
function isScrolledToBottom(container) {
const scrollBottom = container.scrollTop + container.clientHeight;
return scrollBottom >= container.scrollHeight;
}
/**
*
* @param {number} ms
* @returns {Promise<void>}
*/
function sleep(ms) {
return new Promise((resolve) => setTimeout(() => resolve(), ms));
}
/*setTimeout(() => {
complete();
}, 10e3);*/
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment