Skip to content

Instantly share code, notes, and snippets.

@ni554n
Last active September 26, 2019 02:53
Show Gist options
  • Save ni554n/d2a3081a8017b1b94327edce5ad1e6fb to your computer and use it in GitHub Desktop.
Save ni554n/d2a3081a8017b1b94327edce5ad1e6fb to your computer and use it in GitHub Desktop.
Get status report for Facebook user's own liked pages based on page's latest post time.
/**
* Script for generating Facebook user's own liked pages status report as JSON. The report consists of active, inactive,
* unavailable page Ids validated based on page's latest post time.
*
* Unfortunately Facebook doesn't support "unlike" a page via API. So, these IDs should be used with a userscript.
*/
const got = require("got");
const cheerio = require("cheerio");
const fs = require("fs");
/**
* Temporary Facebook Graph API access token acquired from
* [Graph API Explorer]{@link https://developers.facebook.com/tools/explorer}. "user_likes" permission is needed.
*
* @const {string}
*/
const accessToken = "Paste access token here";
/**
* Main function for this script.
*
* @async
* @param {boolean} [readFromCachedPages = false] - If true; pages.json will be read from filesystem if there is any.
*/
(async function (readFromCachedPages = false) {
let pageInfos;
if (readFromCachedPages) pageInfos = require("./pages.json");
pageInfos = pageInfos || await getLikedPageInfos();
writeToFile(pageInfos, "pages.json");
const report = await reportPageStatus(pageInfos);
writeToFile(report, "report.json");
})();
/**
* Uses Facebook Graph API to get all the /me liked pages id and name.
*
* @async
* @param {object} [pageInfos={}] - An object populated with {id: name} of all the liked pages.
* @returns {Promise<object>} - Promise object represents pageInfos object.
*/
async function getLikedPageInfos(pageInfos = {}) {
let url = `https://graph.facebook.com/me?fields=likes.summary(true)&access_token=${accessToken}&pretty=0&limit=100`;
let totalPageCount;
do {
const [error, response] = await forThis(got(url, {json: true}));
if (error) {
console.log(error);
break;
}
const likes = response.body;
const data = likes["data"];
for (const {name, id} of data) pageInfos[id] = name;
if (totalPageCount === undefined) totalPageCount = likes["summery"]["total_count"];
url = likes["paging"]["next"];
} while (url);
console.log(`Total (${Object.keys(pageInfos).length} / ${totalPageCount}) liked page info. acquired.`);
return pageInfos;
}
/**
* Scrape FB for determining every pages status.
*
* @param {object} pageInfos - pageInfo object with page Ids.
* @param {Date.} [date=] - UTC time for an acceptable date for page's latest post. If the time of latest post is
* before the acceptable date, then the page count as inactive. Default value is first day of current year.
* @returns {Promise<Object>} - Promise object of report object.
*/
async function reportPageStatus(pageInfos, date = Date.UTC(new Date().getFullYear(), 0)) {
const unavailable = [];
const inactive = [];
const active = [];
const ids = Object.keys(pageInfos);
for (const id of ids) {
console.log(`Visiting ${pageInfos[id]} ...`);
const pageState = await getPageState(id, date);
const page = {[pageInfos[id]]: `https://fb.com/pg/${id}/posts`};
if (pageState === null) {
console.log("|- Unavailable!");
unavailable.push(page);
} else if (pageState) {
console.log("|- Inactive.");
inactive.push(page);
} else {
console.log("|- Active.");
active.push(page);
}
}
return {
[`${unavailable.length} Unavailable`]: unavailable,
[`${inactive.length} Inactive`]: inactive,
[`${active.length} Active`]: active,
};
}
async function getPageState(id, date) {
const [error, response] = await forThis(got(`https://www.facebook.com/pg/${id}/posts`, {
headers: {"user-agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1"}
}));
if (error) {
console.log(`Skipped - $id scraping failed!`);
return null;
}
const postTimes = extractPostTimes(response.body);
return isInactive(postTimes, date);
}
function extractPostTimes(html) {
const $ = cheerio.load(html);
return $(".userContentWrapper")
.map((index, element) => $(element))
.get()
.map(post => post.find("abbr").attr("data-utime"));
}
function isInactive(postTimes, date) {
let latestPost = postTimes[0];
if (!latestPost) return true;
// First post can be an older featured post. In this case, the second post is the latest post.
if (postTimes.length > 1 && latestPost < postTimes[1]) latestPost = postTimes[1];
return latestPost <= date / 1000;
}
/**
* Write file to local storage.
*
* @param {object} data - JSON object that will be written in a file.
* @param {string} fileName - File name for the storage.
*/
function writeToFile(data, fileName) {
fs.writeFile(fileName, JSON.stringify(data, null, 4), "utf8", error => {
console.log(error || `./${fileName} saved!`);
});
}
/**
* Functional style Promise error handling.
*
* @param {Promise} promise
* @returns {Promise<*[]>} - Either successful response or error.
*/
async function forThis(promise) {
try {
const response = await promise;
return [null, response];
} catch (error) {
return [error, null];
}
}
{
"name": "fb-pages-status",
"version": "1.0.0",
"description": "Get status report for Facebook user's own liked pages based on page's latest post time.",
"main": "fb-pages-status.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": ["facebook", "pages", "graph api", "scrape", "likes"],
"author": "Nissan Ahmed",
"license": "ISC",
"dependencies": {
"cheerio": "^1.0.0-rc.3",
"got": "^9.6.0"
}
}
// ==UserScript==
// @name One Click Multiple Tab Open
// @description Open 20 tabs from JSON pageIDs in facebook by pressing "x".
// @namespace ni554n.github.io
// @include https://*.facebook.com/*
// @grant GM_getValue
// @grant GM_setValue
// @grant GM_openInTab
// ==/UserScript==
// Copy paste an active / inactive pageInfo array from report.json.
const pageInfos = [];
document.addEventListener('keydown', (event) => {
if (event.key === 'x') {
event.preventDefault();
let count = GM_getValue("count", 0);
for (let i = 0; i < 20; i++) {
GM_openInTab(`${Object.values(pageInfos[count++])[0]}`, true);
}
GM_setValue("count", count);
}
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment