|
import os |
|
import sys |
|
import time |
|
import random |
|
import json |
|
import urllib |
|
|
|
from selenium import webdriver |
|
from selenium.webdriver.common.by import By |
|
from selenium.webdriver.common.keys import Keys |
|
from selenium.webdriver.support.ui import WebDriverWait |
|
|
|
# Load data file from disk |
|
data = open('livestream-data.json').read() |
|
data = json.loads(data) |
|
|
|
updated = False |
|
|
|
fb_embed_template = """<iframe src="https://www.facebook.com/plugins/video.php?href={URL_VIDEO}&show_text=1&width=560" width="560" height="382" style="border:none;overflow:hidden" scrolling="no" frameborder="0" allowTransparency="true" allow="encrypted-media" allowFullScreen="true"></iframe>""" |
|
|
|
for page in data['pages']: |
|
|
|
with webdriver.Firefox() as driver: |
|
wait = WebDriverWait(driver, 10) |
|
driver.get(page['facebook_url']) |
|
|
|
elements_userContent = driver.find_elements_by_class_name("userContentWrapper") |
|
|
|
# Loop over all userContent, trying to find an <a> tag |
|
for block in elements_userContent: |
|
|
|
elements = block.find_elements_by_tag_name("a") |
|
|
|
# Loop over all <a> tags, trying to find video |
|
for e in elements: |
|
try: |
|
|
|
if 'video' in e.get_attribute("href"): |
|
# Found a video, but need to check it's live |
|
parent = e.find_element_by_xpath('..').find_element_by_xpath('..').find_element_by_xpath('..').find_element_by_xpath('..').find_element_by_xpath('..') |
|
html = parent.get_attribute("innerHTML") |
|
|
|
if 'is live now' in html: |
|
# Found a live video |
|
print "LIVE VIDEO", page['name'], e.get_attribute("href") |
|
url = e.get_attribute("href") |
|
|
|
if "?" in url: |
|
url_split = url.split("?") |
|
url = url_split[0] |
|
|
|
page['facebook_live_url'] = url |
|
page['facebook_live_last_updated_utc'] = time.time() |
|
page['facebook_live_embed'] = fb_embed_template.replace("{URL_VIDEO}", urllib.quote_plus(page['facebook_live_url'])) |
|
updated = True |
|
|
|
except Exception as e: |
|
print "EXCEPTION", e |
|
|
|
# Scroll down the page a bit |
|
driver.execute_script("window.scrollTo(0, "+str(random.randint(1000, 5000))+")") |
|
|
|
# Keep the browser window open a short while before closing it |
|
time.sleep(random.randint(9, 25)) |
|
|
|
# Update file on disk |
|
if updated is True: |
|
data['last_updated'] = time.time() |
|
with open('livestream-data.json', 'w') as f: |
|
f.write(json.dumps(data, sort_keys=True, indent=2)) |