Skip to content

Instantly share code, notes, and snippets.

@rpgraham84
Created March 9, 2017 20:00
Show Gist options
  • Save rpgraham84/9926aa5fe7bbc5260884d2e4807b4b1e to your computer and use it in GitHub Desktop.
Save rpgraham84/9926aa5fe7bbc5260884d2e4807b4b1e to your computer and use it in GitHub Desktop.
Facebook friends scraper
// Works as of 03/08/2017 - selectors may change on Facebook's end
// Useful for grabbing a list of friends and associated lists of their friends from a single fb user's "friends" page.
// I would have used FB's Graph API, but it doesn't provide any means of getting the same data.
// Must have a plugin in the browser to ignore X-Frame-Options header in order for pages to work within iframes.
// I'm using (https://chrome.google.com/webstore/detail/ignore-x-frame-headers/gleekbfjekiniecknbkamfmkohkpodhe) for Chrome.
// Saves output to json files which can later be assembled into a big json file or entered directly into a database.
// To run the script, get the aforementioned plugin, go to a fb user's page, click friends, then paste this in dev tools.
// There's a runaway memory problem somewhere, but rather than diagnose and fix it, I just restart the script when it crashes.
// It will crash periodically. Just delete the friends that have been completed and start again.
// When the JSON string of friends pops up initially (line 113), copy it and paste into sublime text.
// If it crashes again, note the last friend that completed and delete those friends that have been done already. Load the page again
// and enter `friends = {...}` from sublime text. Then comment out lines 108-113 and paste this script in again.
// Eventually your downloads folder will fill up with a list of json files that you can later concatenate.
(function(e) {
e.save = function(t, n) {
if (!t) {
e.error("Console.save: No data");
return
}
if (!n) n = "console.json";
if (typeof t === "object") { t = JSON.stringify(t) }
var r = new Blob([t], { type: "text/json" }),
i = document.createEvent("MouseEvents"),
s = document.createElement("a");
s.download = n;
s.href = window.URL.createObjectURL(r);
s.dataset.downloadurl = ["text/json", s.download, s.href].join(":");
i.initMouseEvent("click", true, false, window, 0, 0, 0, 0, 0, false, false, false, false, 0, null);
s.dispatchEvent(i)
}
})(console)
window.keepGoing = function(){
var stopgap = iframe.contentDocument.createElement('div');
stopgap.classList.add("mbm");
stopgap.classList.add("_5vf");
stopgap.classList.add("sectionHeader");
stopgap.classList.add("_4khu");
iframe.contentWindow.document.body.appendChild(stopgap);
}
window.friendGen = function*(f) {
keys = Object.keys(f);
for (var i = 0; i < keys.length; i++) {
key = keys[i];
yield [key, f[key]];
}
};
window.getFriendsOfFriend = function(fItem) {
var url = fItem[0],
name = fItem[1];
console.log("Processing " + name);
window.iframe = document.createElement('iframe');
iframe.style.width = "100%";
iframe.style.height = "1080px";
iframe.src = url + "&sk=friends";
iframe.id = "friends-iframe";
document.body.appendChild(iframe);
iframe = document.getElementById("friends-iframe")
window.scrollTo(0, document.body.scrollHeight);
iframe.onload = function(event) {
var f;
console.log("iframe loaded");
window.linkCount = 0
iframe.contentWindow.onscroll = function() {
iframe.contentWindow.setInterval(function() {
iframe.contentWindow.scrollTo(0, iframe.contentDocument.body.scrollHeight);
iframe.contentWindow.setTimeout(function() {
if (iframe.contentDocument.querySelectorAll(".fsl.fwb.fcb a").length === linkCount) {
if (linkCount <= 20) {
console.log("Only " + linkCount + " friends found for " + name);
keepGoing();
}
}
}, 1000);
}, 800);
if (iframe.contentDocument.querySelectorAll(".mbm._5vf.sectionHeader._4khu").length == 0) {
console.log("iframe scrolling");
window.linkCount = iframe.contentDocument.querySelectorAll(".fsl.fwb.fcb a").length;
} else {
iframe.contentWindow.friends = {};
l = iframe.contentDocument.querySelectorAll(".fsl.fwb.fcb a");
for (var i = 0; i < l.length; i++) {
iframe.contentWindow.friends[l[i].href] = l[i].innerText;
}
console.log("Saving " + name + "'s friends");
f = {};
f[url] = {};
f[url][name] = iframe.contentWindow.friends;
console.save(f, name + ".json");
document.body.removeChild(iframe);
getFriendsOfFriend(fgen.next().value);
}
}
iframe.contentWindow.setTimeout(function() { iframe.contentWindow.scrollTo(0, iframe.contentDocument.body.scrollHeight) }, 1000);
};
};
window.onscroll = function() {
if (document.querySelectorAll(".mbm._5vf.sectionHeader._4khu").length == 0) {
console.log("scrolling");
setTimeout(function() { window.scrollTo(0, document.body.scrollHeight) }, 800);
} else {
window.friends = {};
var l = document.querySelectorAll(".fsl.fwb.fcb a");
for (var i = 0; i < l.length; i++) {
friends[l[i].href] = l[i].innerText;
}
console.log(JSON.stringify(friends));
window.fgen = friendGen(friends);
window.onscroll = function(){};
getFriendsOfFriend(fgen.next().value);
}
}
window.scrollTo(0, document.body.scrollHeight);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment