Skip to content

Instantly share code, notes, and snippets.

@anthonyeden
Last active June 27, 2021 12:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save anthonyeden/38b3c4537ad37f2a3bc48aebca0b6a06 to your computer and use it in GitHub Desktop.
Save anthonyeden/38b3c4537ad37f2a3bc48aebca0b6a06 to your computer and use it in GitHub Desktop.
Facebook Live Embed Scraper
This script allows you to scrape a Facebook page for live videos, and generate a iframe embed code for your website. It uses Selenium Firefox web driver. You may get banned if you run this too often - this is yet to be seen.
The file `livestream-data.json` will be updated with extra fields if/when a live video is found.
{
"last_updated": 1584600828.746,
"pages": [
{
"facebook_url": "https://www.facebook.com/newslivetvofficial/",
"name": "Test FB Page",
"website": "https://example.com/"
}
]
}
import os
import sys
import time
import random
import json
import urllib
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
# Load data file from disk
data = open('livestream-data.json').read()
data = json.loads(data)
updated = False
fb_embed_template = """<iframe src="https://www.facebook.com/plugins/video.php?href={URL_VIDEO}&show_text=1&width=560" width="560" height="382" style="border:none;overflow:hidden" scrolling="no" frameborder="0" allowTransparency="true" allow="encrypted-media" allowFullScreen="true"></iframe>"""
for page in data['pages']:
with webdriver.Firefox() as driver:
wait = WebDriverWait(driver, 10)
driver.get(page['facebook_url'])
elements_userContent = driver.find_elements_by_class_name("userContentWrapper")
# Loop over all userContent, trying to find an <a> tag
for block in elements_userContent:
elements = block.find_elements_by_tag_name("a")
# Loop over all <a> tags, trying to find video
for e in elements:
try:
if 'video' in e.get_attribute("href"):
# Found a video, but need to check it's live
parent = e.find_element_by_xpath('..').find_element_by_xpath('..').find_element_by_xpath('..').find_element_by_xpath('..').find_element_by_xpath('..')
html = parent.get_attribute("innerHTML")
if 'is live now' in html:
# Found a live video
print "LIVE VIDEO", page['name'], e.get_attribute("href")
url = e.get_attribute("href")
if "?" in url:
url_split = url.split("?")
url = url_split[0]
page['facebook_live_url'] = url
page['facebook_live_last_updated_utc'] = time.time()
page['facebook_live_embed'] = fb_embed_template.replace("{URL_VIDEO}", urllib.quote_plus(page['facebook_live_url']))
updated = True
except Exception as e:
print "EXCEPTION", e
# Scroll down the page a bit
driver.execute_script("window.scrollTo(0, "+str(random.randint(1000, 5000))+")")
# Keep the browser window open a short while before closing it
time.sleep(random.randint(9, 25))
# Update file on disk
if updated is True:
data['last_updated'] = time.time()
with open('livestream-data.json', 'w') as f:
f.write(json.dumps(data, sort_keys=True, indent=2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment