Skip to content

Instantly share code, notes, and snippets.

@boogheta
Created August 11, 2014 15:02
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save boogheta/89d37c3c5ecf5bf9d6af to your computer and use it in GitHub Desktop.
Save boogheta/89d37c3c5ecf5bf9d6af to your computer and use it in GitHub Desktop.
Scroll and unfold a webpage within PhantomJS or artoo.js
// Handle script with no argument for DEBUG as an artoo bookmarklet
if (typeof(arguments) == "undefined") {
arguments = [60, 20, 15, function(){console.log("FINISHED!");}];
}
(function(endScript, timeout, idle_timeout, ajax_timeout) {
var timeout = Math.max(30, timeout) * 1000,
idle_timeout = idle_timeout * 1000,
ajax_timeout = ajax_timeout * 1000;
// Control each second whether script finished running or ran for too long
// and trigger end of async selenium script if so
var running_since = new Date(),
idling_since = new Date(),
finalize = function() {
var now = new Date();
if ((scrolling || clicking || now - idling_since < idle_timeout) &&
now - running_since < timeout)
return setTimeout(finalize, 1000);
console.log((now - running_since >= timeout ? "FORCE STOPP" : "FINISH") +
"ING script running since", Math.floor((now - running_since)/1000)+"s");
// Clear all leftover running timeouts
var maxTimeOutId = setTimeout(';') + 1000;
for (var i=0; i<maxTimeOutId; i++) clearTimeout(i);
// Run Selenium async-script signal-stopper
return endScript();
};
// Scroll the page screen by screen first
// then back all the way up and down screen by screen once again
var pageYPos = 0,
maxYPos = 555000000000;
scrolling = false,
roundtrip = false,
startScroller = function() {
// Never run twice simultaneously
if (scrolling) return;
scrolling = true;
roundtrip = false;
console.log("STARTING scroll session");
// Let's scroll all the way up, down and back up before everything
window.scroll(0, 0);
window.scroll(0, maxYPos);
window.scroll(0, 0);
pageYPos = 0;
return scroller();
},
getPageYPos = function() {
return Math.max(
pageYPos,
window.pageYOffset || 0,
window.scrollY || 0
);
},
scroller = function() {
var toscroll = Math.max(
1000,
window.scrollMaxY || 0,
window.innerHeight || 0,
window.outerHeight || 0
);
pageYPos = getPageYPos();
window.scroll(0, pageYPos + toscroll);
var newPos = getPageYPos();
if (newPos != pageYPos) {
// Scroll happenned, let's continue
pageYPos = newPos;
setTimeout(scroller, 50);
} else if (!roundtrip) {
// Scroll is finished, let's run it all over once more
window.scroll(0, 0);
pageYPos = 0;
roundtrip = true;
setTimeout(scroller, 500);
} else {
// Scroll is totally over, let's scroll back to top and idle
window.scroll(0, maxYPos);
scrolling = false;
idling_since = new Date();
}
};
// Identify and click (only once) on anchor links to trigger Ajax queries
var clicking = false,
relaunch = true,
isClick = function(element){
// Identify not already clicked links with a href ending in "#"
return (element.href &&
element.href.indexOf('#', element.href.length-1) !== -1 &&
!element.hasAttribute('hyphantomas_clicked')
);
},
simulateClick = function(element) {
// Try clicking all ways
try { element.ondblclick(); } catch(e0) {
try { element.onclick(); } catch(e1) {
try { element.click(); } catch(e2) {
}}}
},
unfold = function() {
// Never run twice simultaneously, plan restart for concurrent calls
if (clicking) {
relaunch = true;
return;
}
clicking = true;
var allElements = document.querySelectorAll('*'),
links = Array.prototype.slice.call(allElements).filter(isClick);
console.log("STARTING unfolding session for total links:", links.length);
return clickAjax(links);
},
clickAjax = function(links) {
if (links.length) {
// Click successively on all identified clicks
var link = links.pop();
if (isClick(link)) {
console.log("CLICKING", link.textContent);
link.setAttribute('hyphantomas_clicked', 'true');
simulateClick(link);
}
return setTimeout(function(){clickAjax(links)}, 100);
} else {
clicking = false;
if (relaunch) {
// Restart if other calls were made concurrently
relaunch = false;
return setTimeout(unfold, 1000);
} else {
// Rerun scroller after unfold in case new scroll-triggers appeared
relaunch = false;
return startScroller();
}
}
};
// Override XMLHttpRequest to force timeouts and trigger scroller + unfolder
var oldXHR = window.XMLHttpRequest,
newXHR = function() {
var realXHR = new oldXHR();
realXHR.addEventListener("readystatechange", function() {
console.log("ajax intercepted");
setTimeout(startScroller, 500);
setTimeout(unfold, 750);
}, false);
realXHR.timeout = ajax_timeout;
realXHR.ontimeout = function(){
console.log("ajax query timed out!!!");
}
return realXHR;
};
window.XMLHttpRequest = newXHR;
setTimeout(startScroller, 500);
setTimeout(unfold, 750);
setTimeout(finalize, 1000);
})(arguments[arguments.length - 1], arguments[0], arguments[1], arguments[2]);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment