Skip to content

Instantly share code, notes, and snippets.

Created January 23, 2024 16:43
Show Gist options
  • Save 1337hero/284dd76d6f1e5fd7df0b3d6fc2cf5183 to your computer and use it in GitHub Desktop.
Save 1337hero/284dd76d6f1e5fd7df0b3d6fc2cf5183 to your computer and use it in GitHub Desktop.
FB Email Scraper
function createCSV(data, fileName) {
const headers = [
const csvContent = [
headers.join(','), =>
.map((header) => {
const value = row[header]
if (value === null) return 'null'
if (typeof value === 'string') {
// Wrap all fields, including those without commas, in double quotes
return `"${value.replace(/"/g, '""')}"`
return value
const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' })
const link = document.createElement('a')
if (navigator.msSaveBlob) {
// IE 10+
navigator.msSaveBlob(blob, fileName)
} else {
const url = URL.createObjectURL(blob)
link.setAttribute('href', url)
link.setAttribute('download', fileName || 'data.csv')
async function scrollDown() {
// const wrapper = document.querySelector("#search-page-list-container");
const wrapper = window
await new Promise((resolve, reject) => {
var totalHeight = 0
var distance = 2000
var timer = setInterval(async () => {
var scrollHeightBefore = wrapper.scrollHeight
wrapper.scrollBy(0, distance)
totalHeight += distance
}, 400)
await new Promise((resolve) => setTimeout(resolve, 1000))
function getEmailFromText(text) {
const emailRegex = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g
const email = text?.match(emailRegex)?.[0]
return email || ''
function clickOnComments(post) {
// Get all divs on the page
var allDivs = post.getElementsByTagName('div')
// Create an array to store matching divs
var matchingDivs = []
// Loop through each div
for (var i = 0; i < allDivs.length; i++) {
// Check if the div has the attribute data-visualcompletion set to "ignore-dynamic"
if (allDivs[i].getAttribute('data-visualcompletion') === 'ignore-dynamic') {
// Add the matching div to the array
const thingToClickToOpenComments =
if (thingToClickToOpenComments) {
// Function to recursively traverse HTML elements and return text in an array
function traverseElements(element) {
var textArray = []
// Check if the element has child nodes
if (element.childNodes.length > 0) {
// Loop through each child node
for (var i = 0; i < element.childNodes.length; i++) {
// Recursively call the function for each child node
textArray = textArray.concat(traverseElements(element.childNodes[i]))
} else {
// If the element is a text node and contains non-whitespace text
if (
element.nodeType === Node.TEXT_NODE &&
element.nodeValue.trim() !== ''
) {
// Push the text into the text array
return textArray
function getTextFromComment(textArray) {
return textArray
?.filter((section) => {
if (section === 'Reply') {
return false
if (section?.match(/^\d+$/)) {
return false
if (section === 'Like') {
return false
if (section === 'Top Contributor') {
return false
if (section === 'Follow') {
return false
if (section === '·') {
return false
return true
?.slice(1, textArray.length - 3)
?.join(' ')
function extractComments(post = undefined) {
let parent = null
if (post) {
parent = post
} else {
const dialog = document?.querySelector('div[role=dialog]')
parent = dialog
if (!parent) {
return []
var allDivs = parent.getElementsByTagName('div')
// Array to store extracted text arrays
var textArrays = []
// Loop through each div
for (var i = 0; i < allDivs.length; i++) {
// Check if the div has the aria-label attribute starting with "Comment by"
var ariaLabel = allDivs[i].getAttribute('aria-label')
if (ariaLabel && ariaLabel.startsWith('Comment by')) {
// Call the recursive function to traverse and extract text
var elementTextArray = traverseElements(allDivs[i])
// Push the text array to the top-level array
return textArrays?.map((textArray) => {
// get the text by slicing off the first element and the last 3 elements
const text = getTextFromComment(textArray)
return {
id: textArray?.join('-')?.toLowerCase(),
author_name: textArray?.[0],
comment: text,
email: getEmailFromText(text),
function getAllPosts() {
const posts = document.querySelectorAll('div[role=feed] > div')
return [...posts].filter((post) => {
const posterName = post?.querySelector('h3')?.textContent
if (posterName) {
return true
return false
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms))
function closeDialog() {
const closeButton = document?.querySelector('div[aria-label="Close"]')
if (!closeButton) {
function getPostText(post) {
let postText = post?.querySelector(
'div > div > div > div > div > div > div > div > div > div:nth-child(8) > div > div > div:nth-child(3) > div',
// const postText = post?.querySelector('div[data-ad-preview="message"]'); // this doesn't work for all of them for some reason
if (!postText) {
return traverseElements(postText)?.join(' ')
function clickSeeMoreIfItsThere(post) {
const buttonDivs = post.querySelectorAll('div[role="button"]')
for (let i = 0; i < buttonDivs.length; i++) {
const div = buttonDivs[i]
// Check if the text content is "See more"
if (div.textContent.trim() === 'See more') {
// Perform actions on the matched div
function getAllCommentsAndFormat(post, comments) {
return => {
return {
post: post?.post,
is_comment: true,
function getPostId(posterName, postText) {
return `${posterName?.split(' ')?.join('-')}-${postText
?.split(' ')
async function run() {
const allContent = []
let posts = getAllPosts()
console.log('posts.length', posts.length)
let i = 0
while (i < posts.length) {
const post = posts[i]
`while you're waiting, why not check out 😅`,
const posterName = post?.querySelector('h3')?.textContent
console.log('posterName', posterName)
await sleep(1000)
const postText = getPostText(post)
const commentsDisplayedWithoutClicking = extractComments(post)
await sleep(1000)
const commentsAfterClickingModal = extractComments()
const content = {
id: getPostId(posterName, postText),
is_post: true,
author_name: posterName,
first_name: posterName?.split(' ')?.[0],
last_name: posterName?.split(' ')?.[1],
post: postText,
email: getEmailFromText(postText),
const comments = getAllCommentsAndFormat(content, [
if (scrolls > 0) {
await scrollDown()
const currentPosts = getAllPosts()
console.log('currentPosts', currentPosts.length)
posts = currentPosts
const unique = []
const uniqueIds = []
allContent.forEach((content) => {
if (!uniqueIds.includes( {
console.log('allContent', unique)
createCSV(unique, 'facebookGroupPostAndComments.csv')
`Congrats! 🎉 You scraped a sh*t ton of posts! If you need any custom scrapers built, email me:`,
// NOTE: to increase the number of posts, increase the "scrolls" variable below
let scrolls = 5
await run()
Copy link

How to use it

  1. Go to the Facebook Group you want to scrape
  2. Open the developer console (right click > inspect)
  3. Copy and paste the code below into the console
  4. Wait and watch the magic 🧙‍♂️

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment