Last active
June 27, 2023 16:09
-
-
Save szhu/1bb6505e5deee6b5db4b1cdb5489ecf9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// domToTsv - Convert rows in a webpage into TSV-formatted table. | |
// Example usage: | |
// This converts instacart.com order receipts to TSV. | |
domToTsv( | |
// Select a single table: | |
[`#store-wrapper main`], | |
// For each table, select every row: | |
`:scope > ul > li`, | |
// Select each column individually: | |
[ | |
// Select the quantity of the item: | |
`:scope :has([data-content="Item quantity:"]) + p`, | |
// Select the price of the item: | |
`:scope img + * > p`, | |
// Select the name of the item: | |
`:scope :has([data-content="Current price:"]) + p`, | |
] | |
); | |
/** | |
* Convert a webpage with rows into a TSV-formatted table. This is useful if you | |
* want to extract tabular data to paste into Excel or Google Sheets. | |
* | |
* @param {Selectors<HTMLElement>} tableSelectors | |
* @param {Selectors<HTMLElement>} rowSelectors | |
* @param {Selectors<string>} cellSelectors | |
*/ | |
function domToTsv(tableSelectors, rowSelectors, cellSelectors) { | |
function replace(from, to) { | |
return (value) => value?.replace(from, to ?? ""); | |
} | |
let html = document.documentElement; | |
let tableValues = getElementsBySelector(html, tableSelectors).map((table) => | |
getElementsBySelector(table, rowSelectors).map((row) => | |
getElementsBySelector(row, cellSelectors).map((cell) => | |
cell instanceof HTMLElement ? cell.innerText.trim() : cell | |
) | |
) | |
); | |
let output = tableValues | |
.map((rowValues) => | |
rowValues | |
.map((cellValues) => cellValues.map(replace(/\t|\n/g, " ")).join("\t")) | |
.join("\n") | |
) | |
.join("\n\n"); | |
if ("copy" in window && typeof copy === "function") { | |
copy(output); | |
} | |
console.log(output); | |
return output; | |
} | |
/** | |
* @template {HTMLElement | string} T | |
* @typedef {T | string | ((scope: HTMLElement) => T) | [string, (el: T) => string]} Selector | |
*/ | |
/** | |
* @template {HTMLElement | string} T | |
* @typedef {string | ((scope: HTMLElement) => T[]) | Selector<T>[]} Selectors | |
*/ | |
/** | |
* A more flexible version of `querySelectorAll` that allows you to pass in | |
* selectors as strings, functions, or arrays of strings and functions. | |
* | |
* @template {HTMLElement | string} T | |
* @param {HTMLElement} scope | |
* @param {Selectors<T>} selectors | |
*/ | |
function getElementsBySelector(scope, selectors) { | |
function warn(value, selector) { | |
if (!value) { | |
console.warn("Selector returned nothing:", scope, selector); | |
} | |
} | |
let results; | |
if (typeof selectors === "string") { | |
results = [...scope.querySelectorAll(selectors)]; | |
} else if (typeof selectors === "function") { | |
results = selectors(scope); | |
} else if (Array.isArray(selectors)) { | |
results = selectors.map((selector) => { | |
let result; | |
if (typeof selector === "string") { | |
result = scope.querySelector(selector); | |
} else if (typeof selector === "function") { | |
result = selector(scope); | |
} else if (Array.isArray(selector)) { | |
let [cssSelector, toString] = selector; | |
result = toString(scope.querySelector(cssSelector)); | |
} else if (selector instanceof HTMLElement) { | |
result = selector; | |
} else { | |
throw new TypeError("Invalid selector: " + selectors); | |
} | |
warn(result, selector); | |
return result; | |
}); | |
} else { | |
throw new TypeError("Invalid selectors: " + selectors); | |
} | |
warn(results.length > 0, selectors); | |
return results; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment