Skip to content

Instantly share code, notes, and snippets.

@skulltech
Created March 6, 2016 15:24
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save skulltech/fc097a04ddb3a102244d to your computer and use it in GitHub Desktop.
Save skulltech/fc097a04ddb3a102244d to your computer and use it in GitHub Desktop.
Reliable module for starting up Selenium Webdriver, with custom user-agent and custom profile
"""
Standard and reliable module for starting up Selenium Webdriver, with custom user-agent and custom profiles.
"""
import os
import subprocess
import sys
import urllib
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
def find_file(name, path):
"""Returns the path of a file in a directory"""
for root, dirs, files in os.walk(path):
if name in files:
return os.path.join(root, name)
def find_selenium_server():
"""Returns the path of the 'standalone-server-standalone-x.x.x.jar' file."""
for root, dirs, files in os.walk(os.getcwd()):
for name in files:
try:
if '-'.join(name.split('-')[:3]) == 'selenium-server-standalone':
return os.path.join(root, name)
except IndexError:
pass
def find_binary_file(name):
"""Returns the path of binary file in a given directory"""
binary_path = None
if sys.platform.startswith('win'):
binary_path = find_file(name=name + '.exe', path=os.getcwd())
elif sys.platform.startswith('darwin') or sys.platform.startswith('linux'):
binary_path = find_file(name=name, path=os.getcwd())
if binary_path:
return binary_path
else:
print('{name} not found in the working directory.'.format(name=name))
def start_selenium_server():
"""Starts the Java Standalone Selenium Server."""
seleniumserver_path = find_selenium_server()
if not seleniumserver_path:
print('The file "standalone-server-standalone-x.x.x.jar" not found.')
return
cmd = ['java', '-jar', seleniumserver_path]
subprocess.Popen(cmd, creationflags=subprocess.CREATE_NEW_CONSOLE)
def start_webdriver(driver_name, user_agent=None, profile_path=None):
"""Starts and returns a Selenium Webdriver.
Args:
driver_name (str): Name of the webdriver to be started [Chrome, Firefox, PhantomJs, HTMLUnit].
user_agent (str): The user_agent string the webdriver should use.
profile_path (str): The path of the browser profile [only for Firefox and Chrome].
Returns:
selenium.webdriver: A Selenium Webdriver according to the Args.
"""
driver_name = driver_name.lower()
driver = None
if driver_name == 'htmlunit':
while True:
try:
urllib.request.urlopen('http://localhost:4444/wd/hub/status')
except urllib.error.URLError:
start_selenium_server()
else:
break
if user_agent:
pass
dcap = webdriver.DesiredCapabilities.HTMLUNITWITHJS
driver = webdriver.Remote(command_executor="http://localhost:4444/wd/hub",
desired_capabilities=dcap)
if driver_name == 'firefox':
if profile_path:
fp = webdriver.FirefoxProfile(profile_path)
else:
fp = webdriver.FirefoxProfile()
if user_agent:
fp.set_preference('general.useragent.override', user_agent)
driver = webdriver.Firefox(fp)
if driver_name == 'chrome':
opt = webdriver.chrome.options.Options()
if user_agent:
opt.add_argument('user-agent={user_agent}'.format(user_agent=user_agent))
if profile_path:
opt.add_argument('user-data-dir={profile_path}'.format(profile_path=profile_path))
chromedriver_path = find_binary_file('chromedriver')
driver = webdriver.Chrome(chromedriver_path, chrome_options=opt)
if driver_name == 'phantomjs':
dcap = DesiredCapabilities.PHANTOMJS
if user_agent:
dcap["phantomjs.page.settings.userAgent"] = user_agent
phantomjs_path = find_binary_file('phantomjs')
driver = webdriver.PhantomJS(phantomjs_path, desired_capabilities=dcap)
return driver
@skulltech
Copy link
Author

I've moved this to a GitHub repo ( https://github.com/SkullTech/webdriver-start ) and I have plans of updating it. So if you've been following this please note I'll do further developments on that GitHub repo only, not here.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment