Skip to content

Instantly share code, notes, and snippets.

@szhu
Last active June 27, 2023 16:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save szhu/1bb6505e5deee6b5db4b1cdb5489ecf9 to your computer and use it in GitHub Desktop.
Save szhu/1bb6505e5deee6b5db4b1cdb5489ecf9 to your computer and use it in GitHub Desktop.
// domToTsv - Convert rows in a webpage into TSV-formatted table.
// Example usage:
// This converts instacart.com order receipts to TSV.
domToTsv(
// Select a single table:
[`#store-wrapper main`],
// For each table, select every row:
`:scope > ul > li`,
// Select each column individually:
[
// Select the quantity of the item:
`:scope :has([data-content="Item quantity:"]) + p`,
// Select the price of the item:
`:scope img + * > p`,
// Select the name of the item:
`:scope :has([data-content="Current price:"]) + p`,
]
);
/**
* Convert a webpage with rows into a TSV-formatted table. This is useful if you
* want to extract tabular data to paste into Excel or Google Sheets.
*
* @param {Selectors<HTMLElement>} tableSelectors
* @param {Selectors<HTMLElement>} rowSelectors
* @param {Selectors<string>} cellSelectors
*/
function domToTsv(tableSelectors, rowSelectors, cellSelectors) {
function replace(from, to) {
return (value) => value?.replace(from, to ?? "");
}
let html = document.documentElement;
let tableValues = getElementsBySelector(html, tableSelectors).map((table) =>
getElementsBySelector(table, rowSelectors).map((row) =>
getElementsBySelector(row, cellSelectors).map((cell) =>
cell instanceof HTMLElement ? cell.innerText.trim() : cell
)
)
);
let output = tableValues
.map((rowValues) =>
rowValues
.map((cellValues) => cellValues.map(replace(/\t|\n/g, " ")).join("\t"))
.join("\n")
)
.join("\n\n");
if ("copy" in window && typeof copy === "function") {
copy(output);
}
console.log(output);
return output;
}
/**
* @template {HTMLElement | string} T
* @typedef {T | string | ((scope: HTMLElement) => T) | [string, (el: T) => string]} Selector
*/
/**
* @template {HTMLElement | string} T
* @typedef {string | ((scope: HTMLElement) => T[]) | Selector<T>[]} Selectors
*/
/**
* A more flexible version of `querySelectorAll` that allows you to pass in
* selectors as strings, functions, or arrays of strings and functions.
*
* @template {HTMLElement | string} T
* @param {HTMLElement} scope
* @param {Selectors<T>} selectors
*/
function getElementsBySelector(scope, selectors) {
function warn(value, selector) {
if (!value) {
console.warn("Selector returned nothing:", scope, selector);
}
}
let results;
if (typeof selectors === "string") {
results = [...scope.querySelectorAll(selectors)];
} else if (typeof selectors === "function") {
results = selectors(scope);
} else if (Array.isArray(selectors)) {
results = selectors.map((selector) => {
let result;
if (typeof selector === "string") {
result = scope.querySelector(selector);
} else if (typeof selector === "function") {
result = selector(scope);
} else if (Array.isArray(selector)) {
let [cssSelector, toString] = selector;
result = toString(scope.querySelector(cssSelector));
} else if (selector instanceof HTMLElement) {
result = selector;
} else {
throw new TypeError("Invalid selector: " + selectors);
}
warn(result, selector);
return result;
});
} else {
throw new TypeError("Invalid selectors: " + selectors);
}
warn(results.length > 0, selectors);
return results;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment