zyryc/downloadwallpapers.py

## downloadwallpapers.py
import requests
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import time
import os

Visited_pages = set()

def get_href_values(url):
    # Set up Selenium webdriver
    selenium_service = Service('path_to_chromedriver')  # Replace 'path_to_chromedriver' with the actual path to your ChromeDriver executable
    chrome_options = Options()
    chrome_options.add_argument('--headless')  # Run Chrome in headless mode
    driver = webdriver.Chrome(service=selenium_service, options=chrome_options)

    driver.get(url)

    # Extract page source after JavaScript execution
    page_source = driver.page_source

    # Close the browser
    driver.quit()

    return page_source

def extract_links(page_source):
    soup = BeautifulSoup(page_source, 'html.parser')

    # Find all the <a> elements with the specified CSS path
    elements = soup.select("body.is-main main.layout-dynamic div.container.container_width_wide div.album.album_category a[href]")

    # Extract the href values based on different criteria
    pages = []
    images = []

    for element in elements:
        href = element["href"]
        if "?page=" in href:
            pages.append(href)
        elif "/image/" in href:
            images.append(href)

    return pages, images

def visit_image_and_download(url):
    # Set up Selenium webdriver
    selenium_service = Service('path_to_chromedriver')  # Replace 'path_to_chromedriver' with the actual path to your ChromeDriver executable
    chrome_options = Options()
    chrome_options.add_argument('--headless')  # Run Chrome in headless mode
    driver = webdriver.Chrome(service=selenium_service, options=chrome_options)

    driver.get(url)
    time.sleep(2)  # Wait for the page to load

    download_button = driver.find_element(By.CSS_SELECTOR, "html body.is-main main.layout-dynamic div.container.container_width_wide div.wallpaper div.wallpaper__main div.grid div.grid__col.grid__col_xs_12.grid__col_md_3 div.wallpaper__right div.wallpaper__buttons a.btn.btn_block.wallpaper__download")
    href = download_button.get_attribute('href')

    with open('href.txt', 'a') as f:
        f.write('\n' + href)


    # Download the image
    download_image(href)

    # Close the browser
    driver.quit()

def download_image(image_url):
    response = requests.get(image_url)
    if response.status_code == 200:
        # Extract the image filename from the URL
        filename = image_url.split("/")[-1]

        # Save the image to the current directory
        with open(filename, "wb") as f:
            f.write(response.content)

        print(f"Downloaded: {filename}")

def scrape_website(url):
    # Get the page source using Selenium
    page_source = get_href_values(url)

    # Extract the links
    pages, images = extract_links(page_source)

    # Visit and download each image
    for image_url in images:
        visit_image_and_download(image_url)

    # Follow the pages and continue scraping
    for page_url in pages:
        if page_url not in Visited_pages:
            Visited_pages.add(page_url)
            scrape_website(page_url)

# Create a directory to store the downloaded images
os.makedirs("images", exist_ok=True)
os.chdir("images")

# URL to start scraping
start_url = "https://wallspic.com/album/nature/1920x1080"

# Scrape the website
scrape_website(start_url)
	import requests
	from selenium import webdriver
	from selenium.webdriver.chrome.service import Service
	from selenium.webdriver.common.by import By
	from selenium.webdriver.chrome.options import Options
	from bs4 import BeautifulSoup
	import time
	import os

	Visited_pages = set()

	def get_href_values(url):
	# Set up Selenium webdriver
	selenium_service = Service('path_to_chromedriver') # Replace 'path_to_chromedriver' with the actual path to your ChromeDriver executable
	chrome_options = Options()
	chrome_options.add_argument('--headless') # Run Chrome in headless mode
	driver = webdriver.Chrome(service=selenium_service, options=chrome_options)

	driver.get(url)

	# Extract page source after JavaScript execution
	page_source = driver.page_source

	# Close the browser
	driver.quit()

	return page_source

	def extract_links(page_source):
	soup = BeautifulSoup(page_source, 'html.parser')

	# Find all the <a> elements with the specified CSS path
	elements = soup.select("body.is-main main.layout-dynamic div.container.container_width_wide div.album.album_category a[href]")

	# Extract the href values based on different criteria
	pages = []
	images = []

	for element in elements:
	href = element["href"]
	if "?page=" in href:
	pages.append(href)
	elif "/image/" in href:
	images.append(href)

	return pages, images

	def visit_image_and_download(url):
	# Set up Selenium webdriver
	selenium_service = Service('path_to_chromedriver') # Replace 'path_to_chromedriver' with the actual path to your ChromeDriver executable
	chrome_options = Options()
	chrome_options.add_argument('--headless') # Run Chrome in headless mode
	driver = webdriver.Chrome(service=selenium_service, options=chrome_options)

	driver.get(url)
	time.sleep(2) # Wait for the page to load

	download_button = driver.find_element(By.CSS_SELECTOR, "html body.is-main main.layout-dynamic div.container.container_width_wide div.wallpaper div.wallpaper__main div.grid div.grid__col.grid__col_xs_12.grid__col_md_3 div.wallpaper__right div.wallpaper__buttons a.btn.btn_block.wallpaper__download")
	href = download_button.get_attribute('href')

	with open('href.txt', 'a') as f:
	f.write('\n' + href)


	# Download the image
	download_image(href)

	# Close the browser
	driver.quit()

	def download_image(image_url):
	response = requests.get(image_url)
	if response.status_code == 200:
	# Extract the image filename from the URL
	filename = image_url.split("/")[-1]

	# Save the image to the current directory
	with open(filename, "wb") as f:
	f.write(response.content)

	print(f"Downloaded: {filename}")

	def scrape_website(url):
	# Get the page source using Selenium
	page_source = get_href_values(url)

	# Extract the links
	pages, images = extract_links(page_source)

	# Visit and download each image
	for image_url in images:
	visit_image_and_download(image_url)

	# Follow the pages and continue scraping
	for page_url in pages:
	if page_url not in Visited_pages:
	Visited_pages.add(page_url)
	scrape_website(page_url)

	# Create a directory to store the downloaded images
	os.makedirs("images", exist_ok=True)
	os.chdir("images")

	# URL to start scraping
	start_url = "https://wallspic.com/album/nature/1920x1080"

	# Scrape the website
	scrape_website(start_url)