Skip to content

Instantly share code, notes, and snippets.

@sudoxx2
Last active October 29, 2023 18:34
Show Gist options
  • Save sudoxx2/2ebbdb52373a6cf3913668aaa2280245 to your computer and use it in GitHub Desktop.
Save sudoxx2/2ebbdb52373a6cf3913668aaa2280245 to your computer and use it in GitHub Desktop.
automate_download
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import os
# function to take care of downloading file
def enable_download_headless(browser,download_dir):
browser.command_executor._commands["send_command"] = ("POST", '/session/$sessionId/chromium/send_command')
params = {'cmd':'Page.setDownloadBehavior', 'params': {'behavior': 'allow', 'downloadPath': download_dir}}
browser.execute("send_command", params)
# instantiate a chrome options object so you can set the size and headless preference
# some of these chrome options might be uncessary but I just used a boilerplate
# change the <path_to_download_default_directory> to whatever your default download folder is located
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--window-size=1920x1080")
chrome_options.add_argument("--disable-notifications")
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--verbose')
chrome_options.add_experimental_option("prefs", {
"download.default_directory": "<path_to_download_default_directory>",
"download.prompt_for_download": False,
"download.directory_upgrade": True,
"safebrowsing_for_trusted_sources_enabled": False,
"safebrowsing.enabled": False
})
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--disable-software-rasterizer')
# initialize driver object and change the <path_to_chrome_driver> depending on your directory where your chromedriver should be
driver = webdriver.Chrome(chrome_options=chrome_options, executable_path="<path_to_chrome_driver>")
# change the <path_to_place_downloaded_file> to your directory where you would like to place the downloaded file
download_dir = "<path_to_place_downloaded_file>"
# function to handle setting up headless download
enable_download_headless(driver, download_dir)
# get request to target the site selenium is active on
driver.get("https://www.thinkbroadband.com/download")
# initialize an object to the location on the html page and click on it to download
search_input = driver.find_element_by_css_selector('#main-col > div > div > div:nth-child(8) > p:nth-child(1) > a > img')
search_input.click()
@sudoxx2
Copy link
Author

sudoxx2 commented Nov 11, 2019

@xaverrevax no problem hit me if you need any clarification or find any bugs

@sudoxx2
Copy link
Author

sudoxx2 commented Mar 11, 2020

@SnapDragon7410 It will be detected as bot if the website you are automating has bot protection. There will always ways around bot detection just require a bit more effort.

let me know if you need any help.

@riko714
Copy link

riko714 commented Jun 17, 2020

Thank you, very helpful
How to do a "save file as"? I want to have the file stored with a different name for version purpose .

@CoolDevMan
Copy link

Hello @sudoxx2
I am using Windows OS 11, Python version is 3.11.4, and Chrome Driver version is 118.
I'm trying to download the csv file to a designated folder, but it doesn't work.
Please check my code below.
Is there a problem with the path?
Could you please help me solve it?
Thanks in advance.

options = webdriver.ChromeOptions()
options.add_argument('--no-sandbox')
options.add_argument('--headless')
options.add_argument('--ignore-certificate-errors')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--disable-extensions')
options.add_argument('--verbose')
options.add_argument("--disable-notifications")
options.add_experimental_option(
    "excludeSwitches", ["enable-automation", "enable-logging"]
)
prefs = {
    "download.default_directory": "C:/Users/xxx/Downloads",
    "download.prompt_for_download": False,
    "download.directory_upgrade": True,
    "safebrowsing_for_trusted_sources_enabled": False,
    "safebrowsing.enabled": False
}
options.add_experimental_option("prefs", prefs)
options.add_argument('--disable-gpu')
options.add_argument('--disable-software-rasterizer')

service = Service(executable_path='./chromedriver-win64/chromedriver.exe')
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(service=service, options=options)

enable_download_headless(driver, "D:/02_work/csv")

driver.implicitly_wait(10) 

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment