Skip to content

Instantly share code, notes, and snippets.

@JosiahBull
Created September 27, 2023 22:10
Show Gist options
  • Save JosiahBull/75eaf05dd74c709d12ea043548db072a to your computer and use it in GitHub Desktop.
Save JosiahBull/75eaf05dd74c709d12ea043548db072a to your computer and use it in GitHub Desktop.
A basic Selenium Script to resolve prematurely skipped media on TubeSync.
import logging
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from tqdm import tqdm
# CONFIG
MAX_PAGES = 10
SERVER_URL = "http://192.168.0.3:4848"
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Create a new instance of the Firefox driver
driver = webdriver.Firefox()
# Set up a wait object with a timeout and polling frequency
wait = WebDriverWait(driver, 10) # wait for up to 10 seconds before throwing a TimeoutException
# Initialize a list to store the URLs
urls = []
try:
logging.info("Starting to scrape URLs from pages.")
# Iterate through the first 10 pages
for page in range(1, MAX_PAGES+1):
# Append the page parameter to the original URL
page_url = f"{SERVER_URL}/media?only_skipped=yes&page={page}"
# Log the current page being processed
logging.info(f"Scraping URLs from {page_url}")
# Navigate to the page URL
driver.get(page_url)
# Locate the elements and extract the href attributes on the current page
elements = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div.card.mediacard a")))
urls.extend([element.get_attribute('href') for element in elements])
# Log the total number of URLs found
logging.info(f"Total URLs found: {len(urls)}")
# Log the start of URL processing
logging.info("Starting to process URLs.")
# Iterate over each URL with a tqdm progress bar
for url in tqdm(urls, unit='url'):
try:
# Log the current URL being processed
logging.info(f"Processing URL {url}")
# Navigate directly to the URL
driver.get(url)
# Perform the sequence of button clicks and log each action
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a.btn.delete-button"))).click()
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button.btn[type='submit']"))).click()
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a.btn"))).click()
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button.btn[type='submit']"))).click()
except Exception as e:
# Log the error and continue with the next URL
logging.error(f"Error processing URL {url}: {e}")
continue
finally:
# Log the end of the script
logging.info("Finished processing URLs. Closing the browser.")
# Close the browser window
driver.quit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment