Skip to content

Instantly share code, notes, and snippets.

@novafacing
Last active November 28, 2023 19:51
Show Gist options
  • Save novafacing/f6f99f4c5884b55f5f535217103b8c3e to your computer and use it in GitHub Desktop.
Save novafacing/f6f99f4c5884b55f5f535217103b8c3e to your computer and use it in GitHub Desktop.
ago-downloader.js
const puppeteer = require("puppeteer");
const child_process = require("child_process");
const fs = require("fs");
async function sh(cmd) {
return new Promise(function (resolve, reject) {
child_process.exec(cmd, (err, stdout, stderr) => {
if (err) {
resolve(err);
} else {
resolve({ stdout, stderr });
}
});
});
}
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
for (let i = 0; i <= 71; i++) {
console.log("Page: ", i);
try {
let downloads = [];
let page_url = "https://ago.ca/collection/browse?page=" + i;
await page.goto(page_url);
const links = (await page.evaluate(
() => Array.from(
document.querySelectorAll('a[href]'),
a => a.getAttribute('href')
)
))
.filter((location) => location.startsWith("/collection/object/"))
.map((location) => "https://ago.ca" + location);
for (const link of links) {
try {
await page.goto(link);
const [response] = await Promise.all([
page.waitForNavigation(),
page.click(".zoom-tool-toggle"),
]);
const imglink = await page.evaluate(() => {
return window.Mirador.viewer.data[0].manifestUri;
});
await page.goto(imglink);
await page.content();
const json_content = await page.evaluate(() => {
return JSON.parse(document.querySelector("body").innerText);
});
let json_filename = imglink.split("/").at(-1);
let img_filename = json_filename.replace(".json", ".png");
fs.writeFileSync("output/" + json_filename, JSON.stringify(json_content));
fs.stat("output/" + img_filename, (err, stats) => {
if (err != null && err.code === 'ENOENT') {
console.log("Downloading: ", imglink);
downloads.push(sh("dezoomify-rs -l --compression 100 " + imglink + " output/" + img_filename));
}
});
} catch (error) {
console.error(error);
}
}
await Promise.all(downloads);
} catch (error) {
console.error(error);
}
}
await browser.close();
console.log("Done...");
})();
@novafacing
Copy link
Author

npm i puppeteer-core (or npm i puppeteer and delete the arg to launch)

@novafacing
Copy link
Author

And cargo install dezoomify-rs ;)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment