Created
September 27, 2023 22:10
-
-
Save JosiahBull/75eaf05dd74c709d12ea043548db072a to your computer and use it in GitHub Desktop.
A basic Selenium Script to resolve prematurely skipped media on TubeSync.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
from selenium import webdriver | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from tqdm import tqdm | |
# CONFIG | |
MAX_PAGES = 10 | |
SERVER_URL = "http://192.168.0.3:4848" | |
# Set up logging | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
# Create a new instance of the Firefox driver | |
driver = webdriver.Firefox() | |
# Set up a wait object with a timeout and polling frequency | |
wait = WebDriverWait(driver, 10) # wait for up to 10 seconds before throwing a TimeoutException | |
# Initialize a list to store the URLs | |
urls = [] | |
try: | |
logging.info("Starting to scrape URLs from pages.") | |
# Iterate through the first 10 pages | |
for page in range(1, MAX_PAGES+1): | |
# Append the page parameter to the original URL | |
page_url = f"{SERVER_URL}/media?only_skipped=yes&page={page}" | |
# Log the current page being processed | |
logging.info(f"Scraping URLs from {page_url}") | |
# Navigate to the page URL | |
driver.get(page_url) | |
# Locate the elements and extract the href attributes on the current page | |
elements = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div.card.mediacard a"))) | |
urls.extend([element.get_attribute('href') for element in elements]) | |
# Log the total number of URLs found | |
logging.info(f"Total URLs found: {len(urls)}") | |
# Log the start of URL processing | |
logging.info("Starting to process URLs.") | |
# Iterate over each URL with a tqdm progress bar | |
for url in tqdm(urls, unit='url'): | |
try: | |
# Log the current URL being processed | |
logging.info(f"Processing URL {url}") | |
# Navigate directly to the URL | |
driver.get(url) | |
# Perform the sequence of button clicks and log each action | |
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a.btn.delete-button"))).click() | |
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button.btn[type='submit']"))).click() | |
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a.btn"))).click() | |
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button.btn[type='submit']"))).click() | |
except Exception as e: | |
# Log the error and continue with the next URL | |
logging.error(f"Error processing URL {url}: {e}") | |
continue | |
finally: | |
# Log the end of the script | |
logging.info("Finished processing URLs. Closing the browser.") | |
# Close the browser window | |
driver.quit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment