Skip to content

Instantly share code, notes, and snippets.

@yogendratamang48
Last active June 15, 2020 07:47
Show Gist options
  • Save yogendratamang48/32dbbb821ea55b22e5293d6373e46480 to your computer and use it in GitHub Desktop.
Save yogendratamang48/32dbbb821ea55b22e5293d6373e46480 to your computer and use it in GitHub Desktop.
Request Helper. Proxy in Selenium, [Make sure your ip is in whitelist of proxy provider]
import ast
import random
from fake_useragent import UserAgent
import requests
from selenium import webdriver
from selenium.common.exceptions import WebDriverException
from lxml import html
import time
PROXIES_FILE = 'configs/proxies.txt'
CHROME_DRIVER_PATH = '/usr/bin/chromedriver'
CHROME_PATH = '/usr/bin/google-chrome'
# FIREFOX_PATH = '/usr/bin/google-chrome'
PROXIES_SEL = [
'fr.proxymesh.com',
'us-wa.proxymesh.com',
'jp.proxymesh.com',
'au.proxymesh.com',
'de.proxymesh.com',
]
def get_random_proxy():
'''
returns random single proxy in list format
'''
PROXIES = ast.literal_eval(open(PROXIES_FILE).read())
random_proxy = {}
random_ip_port = PROXIES[random.randint(0, len(PROXIES)-1)]
random_proxy['http'] = 'http://'+random_ip_port
random_proxy['https'] = 'http://'+random_ip_port
return random_proxy
def get_random_header():
'''
return headers with random useragent
'''
UA = UserAgent()
header = {}
header['User-Agent'] = UA.chrome
return header
def get_response(url, use_proxy=True):
'''
return response from random proxy and useragent
'''
headers = get_random_header()
if use_proxy:
print("Browsing(Proxy): ", url)
proxies = get_random_proxy()
resp = requests.get(url, headers=headers, proxies=proxies)
else:
print("Browsing: ", url)
resp = requests.get(url, headers=headers)
return resp
def get_chrome_page(url):
from selenium.webdriver.chrome.options import Options
options = Options()
# options.binary_location = '/usr/bin/google-chrome'
options.add_argument("--headless")
options.add_argument("--no-sandbox")
# driver = webdriver.Chrome(chrome_options=options, executable_path='/usr/bin/chromedriver')
driver = webdriver.Chrome(chrome_options=options)
resp = get_response(url)
data = "data:text/html;charset=utf-8,{html_content}".format(
html_content=resp.text)
driver.get(data)
time.sleep(2)
page = html.fromstring(driver.page_source.encode('utf-8'))
driver.close()
return page
def load_static_chrome(text):
from selenium.webdriver.chrome.options import Options
options = Options()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
driver = webdriver.Chrome(chrome_options=options)
data = "data:text/html;charset=utf-8,{html_content}".format(
html_content=text)
driver.get(data)
time.sleep(2)
page = html.fromstring(driver.page_source.encode('utf-8'))
driver.close()
return page
def get_driver(browser="firefox", use_proxy=False):
'''
'''
if browser.lower().strip()=='chrome':
print("Chrome started")
from selenium.webdriver.chrome.options import Options
options = Options()
options.binary_location = CHROME_PATH
options.add_argument("--headless")
options.add_argument("--no-sandbox")
driver = webdriver.Chrome(chrome_options=options, executable_path=CHROME_DRIVER_PATH)
return driver
elif browser.lower().strip() == 'firefox' and not use_proxy:
from selenium.webdriver.firefox.options import Options
print("Firefox started")
options = Options()
options.set_headless(True)
driver = webdriver.Firefox(options=options)
return driver
elif browser.lower().strip() == 'firefox' and use_proxy:
from selenium.webdriver.firefox.options import Options
tries = 0
while tries < 5:
options = Options()
options.set_headless(True)
profile = webdriver.FirefoxProfile()
random_int = random.randint(0, 1)
random_proxy_sel = random.choice(PROXIES_SEL)
print("Firefox started: (Proxy Mode): " + random_proxy_sel)
profile.set_preference("network.proxy.type", 1)
profile.set_preference("network.proxy.http", random_proxy_sel)
profile.set_preference("network.proxy.http_port", 31280)
profile.set_preference("network.proxy.ssl", random_proxy_sel)
profile.set_preference("network.proxy.ssl_port", 31280)
# profile.set_preference("general.useragent.override", UA.firefox)
# driver = webdriver.Firefox(firefox_profile=profile)
try:
driver = webdriver.Firefox(options=options, firefox_profile=profile)
return driver
except WebDriverException as e:
print("Proxy Error: ")
print("Try: ", tries)
tries += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment