Skip to content

Instantly share code, notes, and snippets.

@royshil
Last active October 23, 2023 19:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save royshil/504b3178f68d28b0cfe20b85af64d20d to your computer and use it in GitHub Desktop.
Save royshil/504b3178f68d28b0cfe20b85af64d20d to your computer and use it in GitHub Desktop.
Export Tweet as Image (screenshot embed)
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import logging
import os
import sys
import argparse
import urllib.parse
import html
import gradio as gr
from PIL import Image
parser = argparse.ArgumentParser(description="Create a screenshot from twitter embed")
# optional argument for multiple urls to get an image for (only used in headless mode)
parser.add_argument(
"--urls", nargs="+", help="URLs to get images for (only used in headless mode)"
)
# allow running as gradio interface, add an argument to opt for headless mode
parser.add_argument(
"--headless", action="store_true", help="Run in headless mode (no GUI)"
)
args = parser.parse_args()
logging.getLogger().setLevel(logging.INFO)
script_directory = os.path.dirname(__file__)
def get_photo_from_tweet(tweet_url):
options = webdriver.ChromeOptions()
options.add_argument("--allow-insecure-localhost")
options.add_experimental_option(
"excludeSwitches", ["ignore-certificate-errors", "enable-automation"]
)
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("--disable-web-security")
options.add_argument("--autoplay-policy=no-user-gesture-required")
options.add_argument("--headless")
options.add_argument("--nogpu")
options.add_argument("--disablegpu")
options.add_argument("--window-size=1280,1280")
options.add_experimental_option("useAutomationExtension", False)
options.add_experimental_option(
"prefs",
{
"download.default_directory": script_directory,
"profile.default_content_setting_values.automatic_downloads": 2,
},
)
options.add_argument("--mute-audio")
logging.info("Create environment")
driver = webdriver.Chrome(options=options)
logging.info("Loading embed tool page")
# replace x.com with twitter.com
url_for_embed_tool = tweet_url.replace("x.com", "twitter.com")
# add the embed url to the query string, url-escape it
driver.get(
"https://publish.twitter.com/?query=" + urllib.parse.quote(url_for_embed_tool)
)
logging.info("Waiting for page to load")
# wait for the 'EmbedCode-code' element to be loaded
embed_code = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "EmbedCode-code"))
)
if embed_code is None:
logging.error("Embed code not found")
sys.exit(1)
# get the embed code
embed_code_html = embed_code.get_attribute("innerHTML")
logging.info("Embed code loaded")
# decode html escaping
embed_code_html = html.unescape(embed_code_html)
logging.info("Embed code decoded")
logging.info("Create embed code HTML")
embedding_html = f"""<html>
<body>
{embed_code_html}
</body>
</html>
"""
# write the embed code to a file
with open(f"{script_directory}/embed_twitter.html", "w") as f:
f.write(embedding_html)
# load the embed code
driver.get(f"file://{script_directory}/embed_twitter.html")
logging.info("Creating video...")
#log the current html
logging.info(driver.page_source)
# wait until the iframe is loaded
iframe = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.TAG_NAME, "iframe"))
)
# switch to the iframe
driver.switch_to.frame(iframe)
# wait untill all images are loaded
images = WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located((By.TAG_NAME, "img"))
)
# switch back to the main frame
driver.switch_to.default_content()
logging.info("Done.")
# capture screenshot
driver.save_screenshot("screenshot.png")
driver.close()
driver.quit()
logging.info("Converting...")
# rename url_for_embed_tool to file-safe name
output_filename = url_for_embed_tool.replace("https://twitter.com/", "")
# replace any non alphanumeric characters with underscores
output_filename = "".join(
[c if c.isalnum() else "_" for c in output_filename]
) # https://stackoverflow.com/a/295146/149721
# trim whitespace with imagemagick
os.system(f"convert -trim screenshot.png {output_filename}.png")
logging.info("Done.")
# delete the embed code file
os.remove(f"{script_directory}/embed_twitter.html")
# read the screenshot with PIL
im = Image.open(f"{output_filename}.png")
return im
if __name__ == "__main__":
if args.headless:
# headless mode
if args.urls is None:
logging.error("URL not specified")
sys.exit(1)
else:
for url in args.urls:
get_photo_from_tweet(url)
else:
# interactive mode - build Gradio interface
iface = gr.Interface(
fn=get_photo_from_tweet,
inputs="text",
outputs="image",
title="Twitter Embed Screenshot",
description="Create a screenshot from a Twitter embed",
allow_flagging=False,
allow_screenshot=False,
allow_download=False,
)
iface.launch()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment