Last active
October 16, 2020 11:08
-
-
Save BuonOmo/c6c5e5b1859c4988083a85ff95f511e8 to your computer and use it in GitHub Desktop.
Retrieve every Heroku dataclips at once.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Go to the dataclip listing (https://data.heroku.com/dataclips). | |
// Then execute this script in your console. | |
// Be careful, this will focus a new window every 4 seconds, preventing | |
// you from working 4 seconds times the number of dataclips you have. | |
// Retrieve urls and titles | |
let dataclips = Array. | |
from(document.querySelectorAll('.rt-td:first-child a')). | |
map(el => ({ url: el.href, title: el.innerText })) | |
/** | |
* Allows waiting for a given timeout before execution. | |
* @param {number} seconds | |
*/ | |
const timeout = function(seconds) { | |
return new Promise(resolve => { | |
setTimeout(() => { | |
resolve() | |
}, seconds); | |
}) | |
} | |
// With a fast connection, 4 seconds is OK. Dial it down if you | |
// have errors. | |
const expectedLoadTime = 4000 // ms | |
// This is the main loop, windows are opened one by one to ensure focus and a | |
// correct loading time. | |
// The filter query here is useful if you have somehow interrupted | |
// the processus and need to retry. | |
for (const dataclip of dataclips.filter(dc => !dc.content)) { | |
// This opens another window from the script, having access to its DOM. | |
// See https://github.com/buonomo/kazoo for a funnier example usage! | |
// And don't be shy to star and share :D | |
const externWindow = window.open(dataclip.url) | |
// A hack to wait for loading, this could be improved for sure. | |
await timeout(expectedLoadTime) | |
// Expand the query to have it all in the DOM. | |
externWindow.document.querySelector('.hk-button-sm--tertiary').click() | |
// Copy its content and add it to the current dataclip. | |
const content = externWindow.document.querySelectorAll('.view-lines')[1].innerText | |
dataclip.content = content | |
// There are a lot of recurring timeouts on heroku dataclips, and some of outdated | |
// query. This will retrieve the error showed on the page if any, and add its main | |
// message to the dataclip. | |
const error = externWindow.document.querySelector('.bg-lightest-red') | |
if (error !== null) { | |
dataclip.error = error.innerText.match(/ERROR: +(.*)/)[1] | |
} | |
// We don't need the window anymore. | |
externWindow.close() | |
} | |
// Keep them for later. Be careful to delete those at some point, or to not do this part | |
// at all. Since it might be subject to an exploit. | |
localStorage.setItem("scrappedDataclips", JSON.stringify(dataclips)) | |
// Here's a way to do it all at once, however, the page load become unbearable and since | |
// focus is not given to the pages, it seems like chrome doesn't load some of them at | |
// all. | |
/* | |
const grabContent = ({ url, title }) => { | |
return new Promise(resolve => { | |
const externWindow = window.open(url) | |
setTimeout(() => { | |
externWindow.document.querySelector('.hk-button-sm--tertiary').click() | |
const content = externWindow.document.querySelectorAll('.view-lines')[1].innerText | |
externWindow.close() | |
resolve({ url, title, content }) | |
}, 20000); | |
}) | |
} | |
dataclips = await Promise.all(dataclips.map(grabContent)) | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment