Skip to content

Instantly share code, notes, and snippets.

@SafwanLjd
Last active December 31, 2022 06:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save SafwanLjd/2425de7ce193f8e80d22f4ff396024c9 to your computer and use it in GitHub Desktop.
Save SafwanLjd/2425de7ce193f8e80d22f4ff396024c9 to your computer and use it in GitHub Desktop.
Facebook post likers fetcher, written in Python
#! /usr/bin/env python3
# Author: Safwan Ljd
# Email: eljadisafwan at gmail dot com
# Version: 1.1
# Date: 2021-12-10
# License: GPL-3.0-or-later
# Forked From: [fb_likers_fetcher.py](https://gitlab.com/-/snippets/2478576)
#
#
# This is a fork of `fb_likers_fetcher.py` that includes phone numbers
# from the Facebook leak in the CSV file.
#
# This is a CLI tool that pulls the likers of any given Facebook post
# and saves their information in a CSV file.
#
# Run `python fb_likers_fetcher_with_phone.py --help` for help.
#
# Facebook requires you to be logged in in order to view the likers
# of posts, so you will need to specify your username and password
# using the DEFAULT_USERNAME and DEFAULT_PASSWORD variables below,
# or using the the --username and --password flags, alternatively,
# you can specify your Facebook cookie in in the DEFAULT_COOKIE
# variable below, or using the --cookie flag.
#
# Note that if you're on Windows and have the `pycryptodomex` and
# the `pywin32` libraries installed, the script will try to fetch
# your cookie from Google Chrome automatically.
#
# For the phone numbers fetching, you'd need to have a sqlite3 file
# named `facebook.db` with a table named users that has the column
# `id`, which is the ID of the facebook account the number belongs
# to, and the column `phone`, which is the actual phone number.
#
# If you don't have the Facebook leak data, just use the original
# `fb_likers_fetcher.py`, which is way faster due to not having to
# deal with the database reading overhead.
#
#
# Libraries you need to Install:
# bs4
# lxml
# requests
# click
# pycryptodomex (if you're on Windows)
# pywin32 (if you're on Windows)
#
from bs4 import BeautifulSoup
import requests
import sqlite3
import click
import math
import sys
import re
import os
DEFAULT_USERNAME = ""
DEFAULT_PASSWORD = ""
DEFAULT_COOKIE = "" # A String in This Format -> "xs=xxxxxxx; c_user=xxxxxxx"
DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0"
CHUNK_SIZE = 50 # Don't Change This Unless You Know What You're Doing
class FacebookCrawler():
def __init__(self, username: str = "", password: str = "", cookie: str = "", user_agent: str = DEFAULT_USER_AGENT):
self.username = username
self.password = password
self.cookie = cookie
self.user_agent = user_agent
self.session = requests.Session()
headers = {
"User-Agent": self.user_agent,
"Cookie": self.cookie,
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate"
}
self.session.headers.update(headers)
def login(self) -> None:
if not self.cookie:
login_url = "https://m.facebook.com/login.php"
response = self.session.get(login_url)
soup = BeautifulSoup(response.text, features="lxml")
data = {
"lsd": soup.find(attrs={"name": "lsd"}).get("value"),
"m_ts": soup.find(attrs={"name": "m_ts"}).get("value"),
"li": soup.find(attrs={"name": "li"}).get("value"),
"email": self.username,
"pass": self.password,
"login": "Log In",
"version": 1
}
self.session.post(login_url, data=data)
def get_likers_info_list(self, likers_page_soup: BeautifulSoup, ids_list: list = [], likers_num: int = 0) -> list:
likers = likers_page_soup.find_all(attrs={"class": "_4mn c"})
likers_num = likers_num or len(likers)
likers_info_list = []
for i in range(likers_num):
a_tag = likers[i].find("a")
account_endpoint = a_tag.get("href")
user_id = ids_list[i] if ids_list else self.__get_id_from_endpoint(endpoint=account_endpoint)
username = self.__get_username_from_endpoint(endpoint=account_endpoint)
phone = get_phone_num_from_id(user_id)
name = a_tag.text
name_split = name.split(" ")
first_name = name_split[0]
last_name = name_split[-1]
user_info = {
"user_id": user_id,
"username": username,
"first_name": first_name,
"last_name": last_name,
"phone": phone
}
likers_info_list.append(user_info)
return likers_info_list
def get_total_likes(self, likers_page_soup: BeautifulSoup) -> int:
matches = re.findall(r"total_count=[0-9]*", str(likers_page_soup))
try:
return int(re.sub(r"[^0-9]", "", matches[0]))
except Exception:
sys.exit("[*] Unable to Obtain Post Likes Count!\n[*] Check Your Credentials and Make Sure The Link is Valid...")
def get_likers_page_soup(self, likers_page_url: str) -> BeautifulSoup:
response = self.session.get(likers_page_url)
return BeautifulSoup(self.__clean_html(response.text), features="lxml")
def get_likers_page_url(self, post_id: str) -> str:
return "https://m.facebook.com/ufi/reaction/profile/browser/?ft_ent_identifier=" + post_id
def get_next_likers_page_url(self, prev_likers_page_soup: BeautifulSoup) -> str:
endpoint = prev_likers_page_soup.find(attrs={"class": "touchable primary"}).get("href")
return "https://m.facebook.com" + endpoint
def get_ids_list_from_next_likers_page_url(self, next_likers_page_url: str) -> list:
return next_likers_page_url.split("shown_ids=")[-1].split("&")[0].split("%2C")[:CHUNK_SIZE]
def get_post_id(self, post_url: str) -> str:
matches = re.findall(r"posts?[/:]\s?[0-9]*", post_url)
if not matches:
sys.exit("[*] Invalid Post URL")
return re.sub(r"[^0-9]", "", matches[0])
def __get_username_from_endpoint(self, endpoint: str) -> str:
username = ""
try:
username = endpoint[1:].split("?")[0] if (not re.findall(r"\?id=[0-9]*", endpoint)) else username
finally:
return username
def __get_id_from_endpoint(self, endpoint: str) -> str:
user_id = ""
try:
id_section = re.findall(r"\?id=[0-9]*", endpoint)
if id_section:
user_id = id_section.replace("?id=", "")
else:
response = self.session.get("https://m.facebook.com" + endpoint)
matches = re.findall(r"entity_id:[0-9]*", response.text)
matches = matches or re.findall(r"currentProfileID:[0-9]*", response.text)
matches = matches or re.findall(r"\"entity_id\":\"[0-9]*\"", response.text)
matches = matches or re.findall(r"\"userID\":\"[0-9]*\"", response.text)
for match in matches:
user_id = re.sub(r"[^0-9]", "", match)
if user_id:
break
finally:
return user_id
def __clean_html(self, html_text: str) -> str:
html_text = re.sub(r"\\[^u\"/]", "", html_text)
html_text = html_text.replace("\\\"", "\"")
html_text = html_text.replace("\\/", "/")
if "<" not in html_text:
html_text = html_text.encode().decode("unicode_escape")
return html_text
def create_csv_file(file_name: str = "", file_num: int = 1, prefix: str = "users-", ext: str = ".csv") -> str:
while not file_name:
current_file_name = prefix + str(file_num) + ext
if not os.path.exists(current_file_name):
file_name = current_file_name
else:
file_num += 1
file_name = (file_name + ext) if (file_name[-len(ext):].lower()) != ext else file_name
with open(file_name, "wb") as file:
file.write(b"\xEF\xBB\xBF")
with open(file_name, "a", encoding="UTF-8") as file:
file.write("id,username,fn,ln,phone,email,url\n")
return file_name
def append_to_csv_file(likers_info_list: list, file_name: str) -> None:
with open(file_name, "a", encoding="UTF-8") as file:
for user_info in likers_info_list:
user_id = user_info["user_id"]
username = user_info["username"]
first_name = user_info["first_name"]
last_name = user_info["last_name"]
phone = user_info["phone"]
email = ""
url = ""
identifier = username or user_id
if identifier:
email = identifier + "@facebook.com"
url = "https://www.facebook.com/" + identifier
file.write(",".join([user_id, username, first_name, last_name, phone, email, url]) + "\n")
def get_phone_num_from_id(user_id: str) -> str:
phone_num = ""
try:
fb_db_cur.execute("SELECT (phone) FROM users WHERE id = ?", (int(user_id),))
phone_num = fb_db_cur.fetchone()
phone_num = "0" + str(phone_num[0])
finally:
return phone_num
def get_chrome_fb_cookie() -> str:
fb_cookie_str = ""
try:
from Cryptodome.Cipher.AES import new, MODE_GCM
from win32.win32crypt import CryptUnprotectData
from base64 import b64decode
import json
local_appdata = os.path.expandvars("%LOCALAPPDATA%")
cookies_file = local_appdata + "/Google/Chrome/User Data/Default/Cookies"
local_state_file = local_appdata + "/Google/Chrome/User Data/Local State"
with open(local_state_file) as local_state:
key = CryptUnprotectData(b64decode(json.load(local_state)["os_crypt"]["encrypted_key"])[5:])[1]
with sqlite3.connect(cookies_file) as conn:
conn.create_function("decrypt", 1, lambda v: new(key, MODE_GCM, v[3:15]).decrypt(v[15:-16]).decode())
fb_cookies_dict = dict(conn.execute("SELECT name, decrypt(encrypted_value) FROM cookies WHERE host_key = \".facebook.com\" AND name = \"xs\" OR name = \"c_user\""))
fb_cookie_str = "xs=" + fb_cookies_dict["xs"] + "; c_user=" + fb_cookies_dict["c_user"]
print("Fetched Facebook Cookie From Google Chrome!")
print("Cookie: \"" + fb_cookie_str + "\"\n")
finally:
return fb_cookie_str
def fetch_likers(crawler: FacebookCrawler, url: str, number: int, file_name: str) -> None:
post_id = crawler.get_post_id(post_url=url)
likers_page_url = crawler.get_likers_page_url(post_id=post_id)
soup = crawler.get_likers_page_soup(likers_page_url=likers_page_url)
total_likes = crawler.get_total_likes(likers_page_soup=soup)
number = number if (number and number < total_likes) else total_likes
chunks = math.ceil(number / CHUNK_SIZE)
file_name = create_csv_file(file_name=file_name)
header = "==================== Post ID: " + post_id + " ===================="
footer = "=" * len(header)
print("\n" + header)
print("Output File:\t" + file_name)
print("Total likes:\t" + str(total_likes))
print("Fetching:\t" + str(number))
print("Chunks:\t\t" + str(chunks))
print("\nFetching...")
try:
for chunk_num in range(chunks):
print("[" + str("{:.2f}".format((chunk_num) / (chunks) * 100)) + "%" + " Done]\tFetching chunk " + str(chunk_num + 1) + " out of " + str(chunks), end="...\r")
soup = crawler.get_likers_page_soup(likers_page_url=next_likers_page_url) if (chunk_num > 0) else soup
try:
next_likers_page_url = crawler.get_next_likers_page_url(prev_likers_page_soup=soup)
ids_list = crawler.get_ids_list_from_next_likers_page_url(next_likers_page_url=next_likers_page_url)
except Exception:
ids_list = []
append_to_csv_file(likers_info_list=crawler.get_likers_info_list(likers_page_soup=soup, ids_list=ids_list, likers_num=(len(ids_list) or (number % CHUNK_SIZE))), file_name=file_name)
print("[100.00% Done]\t" + str(chunks) + " out of " + str(chunks) + " chunks were fetched!")
print(footer + "\n")
except Exception as exception:
print("\n[*] Something Went Wrong!!\n[*] Your Account MIGHT Have Been Temporarily Blacklisted By Facebook\n[*] Technical Details: " + str(exception), file=sys.stderr)
@click.option("-o", "--output", help="The name of the CSV file, it defaults to \"users-1.csv\"")
@click.option("--password", help="The password of your Facebook account")
@click.option("--username", help="The username of your Facebook account")
@click.option("--cookie", help="The cookie of your Facebook account")
@click.option("-n", "--number", help="Maximum number of likers to fetch")
@click.option("-f", "--file", help="A file containing posts you want to fetch the likers of")
@click.option("-u", "--url", help="The URL of the post you want to fetch the likers of")
@click.command()
def fetcher(url: str, file: str, number: int, cookie: str, username: str, password: str, output: str) -> None:
"""A Tool That Fetches The Likers of Any Given Facebook Post and Saves Them in a CSV File"""
if not url and not file:
sys.exit("[*] You Must Specify Either a URL [--url/-u] or a file [--file/-f]")
posts_urls = []
if file:
output and print("[*] Ignoring the Output File Specified Because [--file/-f] Was Used\n", file=sys.stderr)
output = ""
with open(file, "r") as posts_file:
posts_urls = posts_file.readlines()
print("Total Posts: " + str(len(posts_urls)))
if url:
posts_urls.insert(0, url)
cookie = cookie or DEFAULT_COOKIE or get_chrome_fb_cookie()
username = username or DEFAULT_USERNAME
password = password or DEFAULT_PASSWORD
number = number and int(number)
crawler = FacebookCrawler(username=username, password=password, cookie=cookie)
crawler.login()
for post_url in posts_urls:
post_url = post_url.strip()
if post_url and post_url[0] not in ["#", ";"]:
fetch_likers(crawler=crawler, url=post_url, number=number, file_name=output)
if __name__ == "__main__":
try:
with sqlite3.connect('./facebook.db') as fb_db_conn:
fb_db_cur = fb_db_conn.cursor()
fetcher(prog_name="fetcher")
except Exception as exception:
print("[*] Something Went Wrong!!\n[*] Technical Details: " + str(exception), file=sys.stderr)
except KeyboardInterrupt:
print("\nAborted!")
@SafwanLjd
Copy link
Author

This was moved over to GitLab: https://gitlab.com/-/snippets/2478577

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment