Skip to content

Instantly share code, notes, and snippets.

@barakplasma
Last active December 30, 2023 19:36
Show Gist options
  • Save barakplasma/34e8edf1640a4265479e9183fba38e47 to your computer and use it in GitHub Desktop.
Save barakplasma/34e8edf1640a4265479e9183fba38e47 to your computer and use it in GitHub Desktop.
Scrape Facebook Groups for Apartments using Python
import os
import re
import sqlite3
import argparse
import itertools
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from facebook_scraper import get_posts
# sqlite setup
conn = sqlite3.connect('posts.db')
conn.row_factory = sqlite3.Row
c = conn.cursor()
# Create table
c.execute("CREATE TABLE IF NOT EXISTS posts (post_text text, price real, number_of_rooms INTEGER, image_url text, post_id text PRIMARY KEY)")
class FacebookPost(object):
def __init__(self, post_text: str, price: int, image_url: str, post_id: str, number_of_rooms: int):
self.post_text = post_text
self.price = price
self.image_url = image_url
self.post_id = post_id
self.number_of_rooms = number_of_rooms
def save_post(post: FacebookPost):
c.execute("REPLACE INTO posts VALUES (?,?,?,?,?)", (
post.post_text,
post.price,
post.number_of_rooms,
post.image_url,
post.post_id
))
def get_posts_from_db():
posts = [FacebookPost(p[0], p[1], p[3], p[4], p[2]) for p in c.execute("SELECT * from posts")]
return posts
# argument parser setup
parser = argparse.ArgumentParser(description='Scrape Facebook for posts.')
parser.add_argument('--scrape', action='store_true')
parser.add_argument('--display', action='store_true')
parser.add_argument('--plot', action='store_true')
parser.add_argument('--minimum_price', action='store', type=int, default=5000)
parser.add_argument('--maximum_price', action='store', type=int, default=6000)
parser.add_argument('--minimum_rooms', action='store', type=int, default=3)
parser.add_argument('--maximum_rooms', action='store', type=int, default=3)
args = parser.parse_args()
def pretty_print(post: FacebookPost):
print("\n-----------Post----------\n")
print(f"\n PRICE: {post.price}\n")
if number_of_rooms(post):
print(f"\n number_of_rooms: {number_of_rooms(post)}\n")
print(post.post_text)
def price(post: FacebookPost):
searchResults = re.search('[^0-9]([0-9],{0,1}[0-9]{3})[^0-9]', post.post_text)
if searchResults:
return searchResults.group(1).replace(',', '')
return None
def append_price(post: FacebookPost):
hasPrice = price(post)
if hasPrice:
post.price = hasPrice
def within_price_budget(post: FacebookPost):
if post.price:
return int(post.price) >= flags['minimum_price'] and int(post.price) <= flags['maximum_price']
def right_size(post: FacebookPost):
if post.number_of_rooms:
return int(post.number_of_rooms) >= flags['minimum_rooms'] and int(post.number_of_rooms) <= flags['maximum_rooms']
def has_text(post: FacebookPost):
return len(post.post_text) > 1
def number_of_rooms(post: FacebookPost):
match = re.search('([0-9]) חדרים', post.post_text)
if match:
return match.group(1)
return None
def append_rooms(post: FacebookPost):
hasRooms = number_of_rooms(post)
if hasRooms:
post.number_of_rooms = hasRooms
def print_if_passes_filters(post: FacebookPost):
if passes_filters(post):
pretty_print(post)
def display():
for p in get_posts_from_db():
append_details_from_text(p)
print_if_passes_filters(p)
def append_details_from_text(post: FacebookPost):
append_price(post)
append_rooms(post)
def passes_filters(post: FacebookPost):
return all((
within_price_budget(post),
right_size(post),
has_text(post)
))
# parse args to flags
flags = vars(args)
if flags['display'] and not flags['scrape']:
display()
pages = 10
credentials = None # (os.getenv('fb_username'), os.getenv('fb_password'))
allGroups = itertools.chain(
get_posts(group=253957624766723, pages=pages, credentials=credentials), # דירות להשכרה ברמת גן
get_posts(group=115046608513246, pages=pages, credentials=credentials), # דירות מפה לאוזן בגבעתיים
get_posts(group=246902125410185, pages=pages, credentials=credentials), # דירות להשכרה ברמת גן / גבעתיים
)
if flags['plot']:
sns.set(style="whitegrid")
data = [(post.price, post.number_of_rooms) for post in get_posts_from_db() if (post.price and post.number_of_rooms)]
df = pd.DataFrame(data, columns=["price", "rooms"])
sns.jointplot(x="price", y="rooms", data=df, kind="kde");
plt.show()
if flags['scrape']:
for post in allGroups:
currentFacebookPost = FacebookPost(post.get('text'), None, post.get('image'), post.get('post_id'), None)
append_details_from_text(currentFacebookPost)
save_post(currentFacebookPost)
if flags['display']:
print_if_passes_filters(currentFacebookPost)
# Save (commit) the changes
conn.commit()
c.close()
facebook-scraper >= 0.2.3
seaborn
@barakplasma
Copy link
Author

@barakplasma
Copy link
Author

To run this without installing anything, try it out at https://repl.it/@barakplasma/findapartmentonfacebook

@barakplasma
Copy link
Author

updated to graph price vs rooms density as seen below
price-rooms

also updated to allow login based on environment variables

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment