Skip to content

Instantly share code, notes, and snippets.

@betsalel-williamson
Last active December 16, 2022 21:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save betsalel-williamson/1ad27f465b93ab82374c57a48da096c4 to your computer and use it in GitHub Desktop.
Save betsalel-williamson/1ad27f465b93ab82374c57a48da096c4 to your computer and use it in GitHub Desktop.
Concat Images from Site
#!/usr/bin/env sh
# Short hand to minify javascript file by removing newlines
# trimming spaces
# then copies to clipboard
tr -d '\n' < Scrape_images.js | sed -E -e 's|\/\*[^*]*\*\/||g' -e 's/[ ]{2,}/ /g' | pbcopy
/* Copyright 2022 Betsalel Williamson
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE. */
javascript:var PAGE_RANGE =
[]; /* examples: all pages = [], selected pages [1,27], from start page to end [3] */
/* Global Constants */
var TIME_BEFORE_PAGE_DOWNLOADS_MS = 25,
PAGE_NUMBER_ATTRIBUTE_NAME = "position",
CANVAS_PAGE_NUMBER_ATTRIBUTE_NAME = "page",
IMAGE_TYPE = "image/png",
MAX_ATTEMPTS = 400;
async function waitMs(ms) {
return await new Promise((resolve) => {
setTimeout(resolve, ms);
});
}
async function getPageRange() {
const SCROLL_VIEW_CLASS_NAME =
"vue-recycle-scroller wide-scroll ready direction-vertical",
PAGE_BUTTONS_CLASS_NAME = "item page-nav-item",
PAGE_NUMBER_ATTRIBUTE_NAME = "position",
NUM_ELEMS_TO_SCROLL = 40,
TIME_BETWEEN_SCROLL_MS = 5,
START_PAGE = 1;
document.getElementsByClassName("scroller wide-scroll")[0].scrollTo(0, 0);
document
.getElementsByClassName(
"vue-recycle-scroller wide-scroll ready direction-vertical"
)[0]
.scrollTo(0, 0);
var pages = new Set();
var buttonHeightPx = Number(
document.getElementsByClassName(PAGE_BUTTONS_CLASS_NAME)[0].offsetHeight
);
var scrollbyPx = NUM_ELEMS_TO_SCROLL * buttonHeightPx;
var scrollViewElm = document.getElementsByClassName(
SCROLL_VIEW_CLASS_NAME
)[0];
var scrollMax = (scrollMax = scrollViewElm.scrollHeight);
var buttonHeightPx = Number(
document.getElementsByClassName(PAGE_BUTTONS_CLASS_NAME)[0].offsetHeight
);
for (
var scrollToPx = (START_PAGE - 1) * buttonHeightPx;
scrollToPx <= scrollMax + scrollbyPx;
scrollToPx += scrollbyPx
) {
await new Promise((resolve) => {
t = setTimeout(
function (scrollViewElm, scrollToPx) {
scrollViewElm.scrollTo(0, scrollToPx);
resolve();
},
TIME_BETWEEN_SCROLL_MS,
scrollViewElm,
scrollToPx
);
});
/* set this in a timeout to allow drawing to run */
await new Promise((resolve) => {
setTimeout(function () {
var elms = document.querySelectorAll(
"[" + PAGE_NUMBER_ATTRIBUTE_NAME + "]"
);
for (var i = 0; i < elms.length; i++) {
pages.add(Number(elms[i].getAttribute(PAGE_NUMBER_ATTRIBUTE_NAME)));
}
resolve();
});
});
}
var result = [Math.min(...pages), Math.max(...pages)];
console.log("Pages range from " + result[0] + " to " + result[1] + ".");
document.getElementsByClassName("scroller wide-scroll")[0].scrollTo(0, 0);
document
.getElementsByClassName(
"vue-recycle-scroller wide-scroll ready direction-vertical"
)[0]
.scrollTo(0, 0);
await waitMs(10);
return result;
}
async function getPages(pageRange) {
var [minPage, maxPage] = await getPageRange();
var startpage = pageRange[0] || minPage;
var endpage = pageRange[1] || maxPage;
var imagesAsDataURL = [];
for (var i = startpage; i <= endpage; i++) {
var pageBtn = document.querySelector(
"[" + PAGE_NUMBER_ATTRIBUTE_NAME + '="' + i + '"]'
);
if (pageBtn) {
console.time("page-" + i);
await new Promise(async (resolve) => {
console.log("Downloading page: " + i);
var attempts = 0;
const blank = document.createElement("canvas");
pageBtn.click();
while (true) {
attempts++;
if (attempts >= MAX_ATTEMPTS) {
console.error("Unable to download page: " + i);
break;
}
await waitMs(TIME_BEFORE_PAGE_DOWNLOADS_MS);
var page = document.querySelector(
"[" + CANVAS_PAGE_NUMBER_ATTRIBUTE_NAME + '="' + i + '"]'
);
if (!page) {
if (
document.getElementsByClassName("limited-container").length > 0
) {
/* the page can't be loaded... */
i = endpage;
break;
}
/* else, element not yet loaded... */
continue;
}
var canvas = page.parentElement.querySelector("canvas");
if (!canvas) {
/* element not yet loaded... */
continue;
}
var imageAsDataURL = canvas.toDataURL(IMAGE_TYPE);
blank.width = canvas.width;
blank.height = canvas.height;
const blankData = blank.toDataURL();
if (imageAsDataURL !== blankData) {
imagesAsDataURL.push([i, imageAsDataURL]);
break;
}
}
blank.remove();
resolve();
});
console.timeEnd("page-" + i);
} else {
throw new Error("Unable to find page: " + i);
}
}
return [imagesAsDataURL, imagesAsDataURL[0][0], imagesAsDataURL[imagesAsDataURL.length-1][0]];
}
async function printPages(pageRange) {
var [imagesAsDataURL, startpage, endpage] = await getPages(pageRange);
document.body.innerHTML = "";
document.body.style = "";
document.head.innerHTML = "";
var title = "Pages " + startpage + " to " + endpage;
document.head.title = title;
document.body.attributes = "";
var styleElm = document.createElement("style");
styleElm.innerHTML =
"html { height: 100%; } body { min-height: 100%; } img { max-height: 9.5in; max-width:7in; display: block; margin: 0 auto; object-fit: contain;}";
document.head.append(styleElm);
var divElm = document.createElement("div");
document.body.append(divElm);
for (var i = 0; i < imagesAsDataURL.length; i++) {
var divElm = document.createElement("div");
document.body.append(divElm);
var image = document.createElement("img");
image.src = imagesAsDataURL[i][1];
divElm.append(image);
}
console.log("Ready to print...");
}
printPages(PAGE_RANGE);
javascript:var PAGE_RANGE=[];var TIME_BEFORE_PAGE_DOWNLOADS_MS=25,PAGE_NUMBER_ATTRIBUTE_NAME="position",CANVAS_PAGE_NUMBER_ATTRIBUTE_NAME="page",IMAGE_TYPE="image/png",MAX_ATTEMPTS=400;async function waitMs(e){return await new Promise(a=>{setTimeout(a,e)})}async function getPageRange(){let e="item page-nav-item",a="position";document.getElementsByClassName("scroller wide-scroll")[0].scrollTo(0,0),document.getElementsByClassName("vue-recycle-scroller wide-scroll ready direction-vertical")[0].scrollTo(0,0);for(var r=new Set,l=Number(document.getElementsByClassName(e)[0].offsetHeight),n=40*l,i=document.getElementsByClassName("vue-recycle-scroller wide-scroll ready direction-vertical")[0],o=o=i.scrollHeight,l=Number(document.getElementsByClassName(e)[0].offsetHeight),s=0*l;s<=o+n;s+=n)await new Promise(e=>{t=setTimeout(function(a,r){a.scrollTo(0,r),e()},5,i,s)}),await new Promise(e=>{setTimeout(function(){for(var l=document.querySelectorAll("["+a+"]"),n=0;n<l.length;n++)r.add(Number(l[n].getAttribute(a)));e()})});var c=[Math.min(...r),Math.max(...r)];return console.log("Pages range from "+c[0]+" to "+c[1]+"."),document.getElementsByClassName("scroller wide-scroll")[0].scrollTo(0,0),document.getElementsByClassName("vue-recycle-scroller wide-scroll ready direction-vertical")[0].scrollTo(0,0),await waitMs(10),c}async function getPages(e){for(var[a,r]=await getPageRange(),l=e[0]||a,n=e[1]||r,i=[],o=l;o<=n;o++){var s=document.querySelector("["+PAGE_NUMBER_ATTRIBUTE_NAME+'="'+o+'"]');if(s)console.time("page-"+o),await new Promise(async e=>{console.log("Downloading page: "+o);var a=0;let r=document.createElement("canvas");for(s.click();;){if(++a>=MAX_ATTEMPTS){console.error("Unable to download page: "+o);break}await waitMs(TIME_BEFORE_PAGE_DOWNLOADS_MS);var l=document.querySelector("["+CANVAS_PAGE_NUMBER_ATTRIBUTE_NAME+'="'+o+'"]');if(!l){if(document.getElementsByClassName("limited-container").length>0){o=n;break}continue}var c=l.parentElement.querySelector("canvas");if(!c)continue;var g=c.toDataURL(IMAGE_TYPE);r.width=c.width,r.height=c.height;let E=r.toDataURL();if(g!==E){i.push([o,g]);break}}r.remove(),e()}),console.timeEnd("page-"+o);else throw Error("Unable to find page: "+o)}return[i,i[0][0],i[i.length-1][0]]}async function printPages(e){var[a,r,l]=await getPages(e);document.body.innerHTML="",document.body.style="",document.head.innerHTML="",document.head.title="Pages "+r+" to "+l,document.body.attributes="";var n=document.createElement("style");n.innerHTML="html { height: 100%; } body { min-height: 100%; } img { max-height: 9.5in; max-width:7in; display: block; margin: 0 auto; object-fit: contain;}",document.head.append(n);var i=document.createElement("div");document.body.append(i);for(var o=0;o<a.length;o++){var i=document.createElement("div");document.body.append(i);var s=document.createElement("img");s.src=a[o][1],i.append(s)}console.log("Ready to print...")}printPages(PAGE_RANGE);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment