Skip to content

Instantly share code, notes, and snippets.

@baptx
Last active May 13, 2021 15:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save baptx/5594449f35a776d3ed07d81a6cb588ba to your computer and use it in GitHub Desktop.
Save baptx/5594449f35a776d3ed07d81a6cb588ba to your computer and use it in GitHub Desktop.
Display and load all Facebook posts / comments
/* This script will allow you to use the Ctrl+F search function of your web browser to search text in all posts / comments
* (without sending your search requests to Facebook and the HTML page can be saved with Ctrl+S as a backup for offline use).
* It is also useful if you want to save the content of a Facebook group that requires you to renew a paid membership to remain a member
* (video / audio / image and other files should be downloaded separately) */
// replace with your language translation if needed (case-sensitive)
var moreCommentText = "^View 1 more comment$"; // if the text is only "comment", script will click on unintended links containing the word "comment" in the page title
var moreCommentsText = "^View [0-9]+ more comments$";
var previousCommentsText = "^View previous comments$";
var moreReplyText = "^View 1 more reply$";
var moreRepliesText = "^View [0-9]+ more replies$";
var oneReplyText = "^View 1 reply$";
var allRepliesText = "^View all [0-9]+ replies$";
var replyText = "^1 Reply$";
var repliesText = "^[0-9]+ Replies$"; // avoid clicking on "Hide X Replies"
var seeMoreText = "See More"; // no need for ^ / $ delimeters since regex is not used
/* Scroll to the bottom of the page or to the limit you want to display before executing the function displayLoadAll().
* You may need to run the function 2 times or more to load everything like nested replies.
* You may also need to scroll back slowly to the other side of the page to refresh the DOM with pending HTTP requests before running the function again or saving page.
* The function displayContent() can be executed if necessary to display hidden content before saving the page, which should be done at the bottom of the page to avoid display issues */
function displayLoadAll()
{
// promise chain fixed based on https://stackoverflow.com/questions/66164973/promise-chain-does-not-wait-until-other-promise-is-resolved/66165074#66165074
/*displayContent()
.then(loadCommentsReplies)
.then(loadSeeMore);*/
loadCommentsReplies()
.then(loadSeeMore);
}
function displayContent()
{
return new Promise(function(resolve) {
var list = document.querySelectorAll("[role=feed]")[0].childNodes;
var length = list.length;
for (var i = 1; i < length - 2; ++i) { // ignore first and last 2 elements
var target = list[i].firstChild.firstChild.firstChild.firstChild.firstChild.firstChild;
// unhide content when needed only
if (target.hidden) {
console.log("displayContent", i, length);
target.removeAttribute("hidden");
target.style.display = "block";
}
}
resolve();
});
}
function loadCommentsReplies()
{
return new Promise(function(resolve) {
var list = document.querySelectorAll("span > span[dir=auto], span > span[dir=auto] > div > div:nth-child(4)"); // avoid using random class names that can change
var length = list.length;
var i = -1;
function listLoop() {
if (++i < length) {
// avoid clicking on unwanted buttons
if (list[i].firstChild.nodeValue && list[i].firstChild.nodeValue.match(
moreCommentText + "|"
+ moreCommentsText + "|"
+ previousCommentsText + "|"
+ moreReplyText + "|"
+ moreRepliesText + "|"
+ oneReplyText + "|"
+ allRepliesText + "|"
+ replyText + "|"
+ repliesText)) {
console.log("loadCommentsReplies", i, length);
// timeout not needed to send all requests since we have to scroll back to send pending requests for the displayed content
//setTimeout(function(){
list[i].click();
listLoop();
//}, 1000);
}
else {
listLoop();
}
}
else {
resolve();
}
}
listLoop();
});
}
function loadSeeMore()
{
return new Promise(function(resolve) {
// use querySelectorAll instead of getElementsByClassName since it returns a static list instead of a live list (to avoid undefined error when links are clicked and removed from the DOM)
var list = document.querySelectorAll("div[role=button]");
var length = list.length;
var i = -1;
function listLoop() {
if (++i < length) {
// avoid clicking on like buttons by mistake if Facebook changes DOM structure
if (list[i].firstChild.nodeValue == seeMoreText) {
console.log("loadSeeMore", i, length);
// timeout not needed unless there is a robot detection to bypass (data already present)
//setTimeout(function(){
list[i].click();
listLoop();
//}, 1000);
}
else {
listLoop();
}
}
else {
resolve();
}
}
listLoop();
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment