Skip to content

Instantly share code, notes, and snippets.

@szhu
Last active November 9, 2022 01:49
Embed
What would you like to do?
// Example usage:
// This grabs the receipt from an order on instacart.com:
domToTsv(
[`.css-oqluv6`], // selects the table
`li[class$=DeliveryItemDetails]`, // selects rows in table
[
".css-coqxwd", // selects the name "column"
".css-1a4cpf7, .css-1dv9j3w", // selects the price "column"
]
)
/**
* @template {HTMLElement | string} T
* @typedef {string | ((root: HTMLElement) => T) | [string, (text: string) => string]} Selector
*/
/**
* @template {HTMLElement | string} T
* @typedef {string | ((root: HTMLElement) => T[]) | Selector<T>[]} Selectors
*/
/**
* Convert tablular data in a webpage into a tab- and newline- separated table.
* This is useful if you want to extract tablular data to paste into Excel or Google Sheets.
*
* @param {Selectors<HTMLElement>} tableSelectors
* @param {Selectors<HTMLElement>} rowSelectors
* @param {Selectors<string>} cellSelectors
*/
function domToTsv(
tableSelectors,
rowSelectors,
cellSelectors,
) {
function getElementsFromSelector(root, selectors) {
if (selectors == null) {
console.log(root);
}
if (typeof selectors === "string") {
return root.querySelectorAll(selectors);
}
else if (typeof selectors === "function") {
return selectors(root);
}
else if (Array.isArray(selectors)) {
let elements = [];
for (let selector of selectors) {
if (typeof selector === "string") {
elements.push(root.querySelector(selector));
}
else if (typeof selector === "function") {
elements.push(selector(root));
}
else if (Array.isArray(selector)) {
let [cssSelector, postFunc] = selector;
elements.push(postFunc(root.querySelector(cssSelector).innerText));
}
else if (selector instanceof HTMLElement) {
elements.push(selector);
}
else {
throw new Error("Invalid selector: " + selectors);
}
}
return elements;
}
else {
throw new Error("Invalid selectors: " + selectors);
}
}
let html = document.documentElement;
let tables = getElementsFromSelector(html, tableSelectors);
console.log("TABLES:", html, tableSelectors, tables);
let tableValues = [];
for (let table of tables) {
let rows = getElementsFromSelector(table, rowSelectors);
let rowValues = [];
console.log("ROWS:", table, rowSelectors, rows);
for (let row of rows) {
let cellValues = [];
let cells = getElementsFromSelector(row, cellSelectors);
console.log("CELLS:", row, cellSelectors, cells);
for (let cell of cells) {
if (cell instanceof HTMLElement) {
cell = cell.innerText;
}
if (typeof cell === "string") {
cell = cell.trim();
}
cellValues.push(cell);
}
rowValues.push(cellValues);
}
tableValues.push(rowValues);
}
function replace(fromRegex, to) {
return function(value) {
return value.replace(fromRegex, to ?? "");
}
}
let output = tableValues.map(rowValues => (
rowValues.map(cellValues => (
cellValues.map(replace(/\t/g)).join("\t")
)).map(replace(/\n/g)).join("\n")
)).join("\n\n");
if ("copy" in window && typeof window["copy"] === "function") {
window["copy"](output);
}
console.log(output);
}
// Edit on typescriptlang.org/play:
// https://www.typescriptlang.org/play?strict=false&filetype=js#code/PTAEFEA8EMFsAcA2BTUBXAztA5sgXAFAigAqAFgJYajYBO0ARtQC5mq3IDGyF8zoAM1oB7WKGgA7UMNoATZLWlSKEjM2idotZgDpOowrNElhJDADcAFAVC3QAbQAGejBgC0ARgCOlThPiOALoANHbEGMgonMwsbKDqDCg2do54GPrwqAB8oIgUjqF2tuGRXDGgIgDu1CrxjElF9slFtgBELu76XpCVsq2FJVHlrKgScKit+ohosBKtzS3tnK6e0AAsnPACAOz9oINlsajwtBTcoJPC07PzRYEEAJQERABULzYvoAACzMgIiNBfqAAN4ACRIAFkADLgFCwZASfgAH1AalOEmwAF9SB9vswAJ6ZeQCEFolTYUAoyyWETCZh4UDg6Gwv4I5gPUAAXhyJA5KPsZIxoUsv0g9NRzHR2A53IlUsC2IAyqVojIPsBXu9QJ8fn8kIDUGDITC4WzKXLydiSLifoTkMTSZLyebqbTxUyTazETKefZAnzQMqhjIADwkLJ+pUq5gyDDqzW4gDCwgk5gU-AS0wBilkgOgoFq+cqyAY8BwqBUMfEdQYbnEElkoAkyEqeWbdYiZfov0bmeQOlx5CoBeomGQAjQiALJPxwjQoEqkgzwlAyDF9GiNazWlAufU8RXZbUFcRK6g3CnMlAAHFhMJsChA2xkDEB9rQDau3AQUGyrGQx6LLwoiWTYn2v6qrQcbvl8X5iMCEExlBAHGkBbKgRUwiVIhsaflo34IdG-6CtgGEXogOFQfGE4SNEFApruximBY1i2OBRFQcEyRVJRGBcbY5G8VxHLAskNF0QxuDMGhiIYAAYiIsCUTSd7MKEETBlBIkLBQJKWBpf5QVynJNpOiDaS0oD6KoVz9og94qXSDwANwLJiOl6QSmTCCSBmQdQnKBRcJGtBZLQcMwaC0FIbo6F4aAKPilEAIKIIg+kcRgLluQskQRNOoAinaPmoplxkma04nMPRcxhUUEVRVIflIRgjnsq5RTuUUeUVnpyW0PQ+I6FQ-WDRlmlZXVdgoPwpTAeUJl+h1LQCFelgzaVmnSL5mVTUUumFV5yAlc1V6BRVIV7S0c1shgOjwJgZBtXFCW0ElHHjYZDzZZZthdb9PUFUV3k7Vt50XFVNWhSCCyWTdsn3Y9n2QW133LZZ-1w4g+UHZYo3QENI0DQTyNId9MO-dNL4OMsGCUaE8DCGocloLRgRcpthnowDpoIw9GBPYzzOs5wz3xYlym05RDzDRIza0CQa7sj9v2Y9d2O9YVp2KCoaiSNwJWAbz7IU5Tq7G3d-NPdrKsY7DdiA6JZvxGQVRNi2EADTIlitAAkqm0B5I22sMq0oAANSc-5tstGrf3PJZDXRebXoxNzauO-brBu82lSeyItA+-75iBxQweZaHEdRy1Mf-WryQbWQzCwFOJlGJwMxsjo7ed4iMnMMtG19gFNAvv38mKcpTct6E7ETSr1kYLZOj2dgPskMlABCMKKngezT4gs-1MgQlbsgWWD9TfYAGqBwlI9LWJa1D8f21n1lpu2BtVQj1J48KaIZSfZQg8V2tzb+WFb7THPhzR+RRF7L1Xj7AASgAeQAOq7z2MAzC2FMogKwhfBYq1FDrWpm7EqP8robXIlA++sDAjcy-tTciv8x4WwAUpD6VRQiCTAfbBBKAV4OVaImcAUIoRYIIZUXhkQKL4KsnIohlkSGFRoXIt+rCrq2FxuRAsqh1C0WOiSI2qdtF2D0SZcist5aKzFEwuwcc7C4yOiVSxQVWiXU-r9Sxii0o6CdLASwMdOr2wEnIuh59EYC0sOREJoAnFVEiZbJGtC77n1rgsG+6SUkxKSTklW7kxIi2qgxDg+puCWCEKIZByBcCQFnsIKaScpCQxTJYEu0CrotNAJ0hKOhykAkqdU2AtT6mNNAAAfkmRcUK6dkhFOYfwOczAHr8BMtk6Bd1YDQHgCpSoySuQ5FYnYfJWydA7L2WkrZRzCoCIiTki5uyaTIAqcgSwwAAA6zBgDShlgAK2ECoH23zQoLG+k8vZgyNDvK+RIX5ELAXAtaJ82qyREVAokCCiQqK5kLGSLjS48B8Rh1qJUFQRg84ADIqXxGKiSclDYsL2CJSS9m4NKolKhlNRllKWUZDZZYFZazCnJEEXZBywq0DtQIEU4gckrxrjgEgZAEy6CMBduwLgPA+CCEUvWaQcgFBKH0XrTQ2g9AGAIEYWAJgzBWAWE4DongfBnH8EEfiKQ0gZGyLkfInrbBNEpksFYXQeh9ADYsZ1Hh1ibB2P0BY9wHhAA
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment