Created
August 11, 2014 15:02
-
-
Save boogheta/89d37c3c5ecf5bf9d6af to your computer and use it in GitHub Desktop.
Scroll and unfold a webpage within PhantomJS or artoo.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Handle script with no argument for DEBUG as an artoo bookmarklet | |
if (typeof(arguments) == "undefined") { | |
arguments = [60, 20, 15, function(){console.log("FINISHED!");}]; | |
} | |
(function(endScript, timeout, idle_timeout, ajax_timeout) { | |
var timeout = Math.max(30, timeout) * 1000, | |
idle_timeout = idle_timeout * 1000, | |
ajax_timeout = ajax_timeout * 1000; | |
// Control each second whether script finished running or ran for too long | |
// and trigger end of async selenium script if so | |
var running_since = new Date(), | |
idling_since = new Date(), | |
finalize = function() { | |
var now = new Date(); | |
if ((scrolling || clicking || now - idling_since < idle_timeout) && | |
now - running_since < timeout) | |
return setTimeout(finalize, 1000); | |
console.log((now - running_since >= timeout ? "FORCE STOPP" : "FINISH") + | |
"ING script running since", Math.floor((now - running_since)/1000)+"s"); | |
// Clear all leftover running timeouts | |
var maxTimeOutId = setTimeout(';') + 1000; | |
for (var i=0; i<maxTimeOutId; i++) clearTimeout(i); | |
// Run Selenium async-script signal-stopper | |
return endScript(); | |
}; | |
// Scroll the page screen by screen first | |
// then back all the way up and down screen by screen once again | |
var pageYPos = 0, | |
maxYPos = 555000000000; | |
scrolling = false, | |
roundtrip = false, | |
startScroller = function() { | |
// Never run twice simultaneously | |
if (scrolling) return; | |
scrolling = true; | |
roundtrip = false; | |
console.log("STARTING scroll session"); | |
// Let's scroll all the way up, down and back up before everything | |
window.scroll(0, 0); | |
window.scroll(0, maxYPos); | |
window.scroll(0, 0); | |
pageYPos = 0; | |
return scroller(); | |
}, | |
getPageYPos = function() { | |
return Math.max( | |
pageYPos, | |
window.pageYOffset || 0, | |
window.scrollY || 0 | |
); | |
}, | |
scroller = function() { | |
var toscroll = Math.max( | |
1000, | |
window.scrollMaxY || 0, | |
window.innerHeight || 0, | |
window.outerHeight || 0 | |
); | |
pageYPos = getPageYPos(); | |
window.scroll(0, pageYPos + toscroll); | |
var newPos = getPageYPos(); | |
if (newPos != pageYPos) { | |
// Scroll happenned, let's continue | |
pageYPos = newPos; | |
setTimeout(scroller, 50); | |
} else if (!roundtrip) { | |
// Scroll is finished, let's run it all over once more | |
window.scroll(0, 0); | |
pageYPos = 0; | |
roundtrip = true; | |
setTimeout(scroller, 500); | |
} else { | |
// Scroll is totally over, let's scroll back to top and idle | |
window.scroll(0, maxYPos); | |
scrolling = false; | |
idling_since = new Date(); | |
} | |
}; | |
// Identify and click (only once) on anchor links to trigger Ajax queries | |
var clicking = false, | |
relaunch = true, | |
isClick = function(element){ | |
// Identify not already clicked links with a href ending in "#" | |
return (element.href && | |
element.href.indexOf('#', element.href.length-1) !== -1 && | |
!element.hasAttribute('hyphantomas_clicked') | |
); | |
}, | |
simulateClick = function(element) { | |
// Try clicking all ways | |
try { element.ondblclick(); } catch(e0) { | |
try { element.onclick(); } catch(e1) { | |
try { element.click(); } catch(e2) { | |
}}} | |
}, | |
unfold = function() { | |
// Never run twice simultaneously, plan restart for concurrent calls | |
if (clicking) { | |
relaunch = true; | |
return; | |
} | |
clicking = true; | |
var allElements = document.querySelectorAll('*'), | |
links = Array.prototype.slice.call(allElements).filter(isClick); | |
console.log("STARTING unfolding session for total links:", links.length); | |
return clickAjax(links); | |
}, | |
clickAjax = function(links) { | |
if (links.length) { | |
// Click successively on all identified clicks | |
var link = links.pop(); | |
if (isClick(link)) { | |
console.log("CLICKING", link.textContent); | |
link.setAttribute('hyphantomas_clicked', 'true'); | |
simulateClick(link); | |
} | |
return setTimeout(function(){clickAjax(links)}, 100); | |
} else { | |
clicking = false; | |
if (relaunch) { | |
// Restart if other calls were made concurrently | |
relaunch = false; | |
return setTimeout(unfold, 1000); | |
} else { | |
// Rerun scroller after unfold in case new scroll-triggers appeared | |
relaunch = false; | |
return startScroller(); | |
} | |
} | |
}; | |
// Override XMLHttpRequest to force timeouts and trigger scroller + unfolder | |
var oldXHR = window.XMLHttpRequest, | |
newXHR = function() { | |
var realXHR = new oldXHR(); | |
realXHR.addEventListener("readystatechange", function() { | |
console.log("ajax intercepted"); | |
setTimeout(startScroller, 500); | |
setTimeout(unfold, 750); | |
}, false); | |
realXHR.timeout = ajax_timeout; | |
realXHR.ontimeout = function(){ | |
console.log("ajax query timed out!!!"); | |
} | |
return realXHR; | |
}; | |
window.XMLHttpRequest = newXHR; | |
setTimeout(startScroller, 500); | |
setTimeout(unfold, 750); | |
setTimeout(finalize, 1000); | |
})(arguments[arguments.length - 1], arguments[0], arguments[1], arguments[2]); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment