JosiahBull/fix_skipped.py

## fix_skipped.py
import logging
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from tqdm import tqdm

# CONFIG
MAX_PAGES = 10
SERVER_URL = "http://192.168.0.3:4848"

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Create a new instance of the Firefox driver
driver = webdriver.Firefox()

# Set up a wait object with a timeout and polling frequency
wait = WebDriverWait(driver, 10)  # wait for up to 10 seconds before throwing a TimeoutException

# Initialize a list to store the URLs
urls = []

try:
    logging.info("Starting to scrape URLs from pages.")

    # Iterate through the first 10 pages
    for page in range(1, MAX_PAGES+1):
        # Append the page parameter to the original URL
        page_url = f"{SERVER_URL}/media?only_skipped=yes&page={page}"

        # Log the current page being processed
        logging.info(f"Scraping URLs from {page_url}")

        # Navigate to the page URL
        driver.get(page_url)

        # Locate the elements and extract the href attributes on the current page
        elements = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div.card.mediacard a")))
        urls.extend([element.get_attribute('href') for element in elements])

    # Log the total number of URLs found
    logging.info(f"Total URLs found: {len(urls)}")

    # Log the start of URL processing
    logging.info("Starting to process URLs.")

    # Iterate over each URL with a tqdm progress bar
    for url in tqdm(urls, unit='url'):
        try:
            # Log the current URL being processed
            logging.info(f"Processing URL {url}")

            # Navigate directly to the URL
            driver.get(url)

            # Perform the sequence of button clicks and log each action
            wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a.btn.delete-button"))).click()
            wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button.btn[type='submit']"))).click()
            wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a.btn"))).click()
            wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button.btn[type='submit']"))).click()

        except Exception as e:
            # Log the error and continue with the next URL
            logging.error(f"Error processing URL {url}: {e}")
            continue

finally:
    # Log the end of the script
    logging.info("Finished processing URLs. Closing the browser.")

    # Close the browser window
    driver.quit()
	import logging
	from selenium import webdriver
	from selenium.webdriver.common.by import By
	from selenium.webdriver.support.ui import WebDriverWait
	from selenium.webdriver.support import expected_conditions as EC
	from tqdm import tqdm

	# CONFIG
	MAX_PAGES = 10
	SERVER_URL = "http://192.168.0.3:4848"

	# Set up logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

	# Create a new instance of the Firefox driver
	driver = webdriver.Firefox()

	# Set up a wait object with a timeout and polling frequency
	wait = WebDriverWait(driver, 10) # wait for up to 10 seconds before throwing a TimeoutException

	# Initialize a list to store the URLs
	urls = []

	try:
	logging.info("Starting to scrape URLs from pages.")

	# Iterate through the first 10 pages
	for page in range(1, MAX_PAGES+1):
	# Append the page parameter to the original URL
	page_url = f"{SERVER_URL}/media?only_skipped=yes&page={page}"

	# Log the current page being processed
	logging.info(f"Scraping URLs from {page_url}")

	# Navigate to the page URL
	driver.get(page_url)

	# Locate the elements and extract the href attributes on the current page
	elements = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div.card.mediacard a")))
	urls.extend([element.get_attribute('href') for element in elements])

	# Log the total number of URLs found
	logging.info(f"Total URLs found: {len(urls)}")

	# Log the start of URL processing
	logging.info("Starting to process URLs.")

	# Iterate over each URL with a tqdm progress bar
	for url in tqdm(urls, unit='url'):
	try:
	# Log the current URL being processed
	logging.info(f"Processing URL {url}")

	# Navigate directly to the URL
	driver.get(url)

	# Perform the sequence of button clicks and log each action
	wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a.btn.delete-button"))).click()
	wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button.btn[type='submit']"))).click()
	wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a.btn"))).click()
	wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button.btn[type='submit']"))).click()

	except Exception as e:
	# Log the error and continue with the next URL
	logging.error(f"Error processing URL {url}: {e}")
	continue

	finally:
	# Log the end of the script
	logging.info("Finished processing URLs. Closing the browser.")

	# Close the browser window
	driver.quit()