Last active
June 15, 2020 07:47
-
-
Save yogendratamang48/32dbbb821ea55b22e5293d6373e46480 to your computer and use it in GitHub Desktop.
Request Helper. Proxy in Selenium, [Make sure your ip is in whitelist of proxy provider]
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ast | |
import random | |
from fake_useragent import UserAgent | |
import requests | |
from selenium import webdriver | |
from selenium.common.exceptions import WebDriverException | |
from lxml import html | |
import time | |
PROXIES_FILE = 'configs/proxies.txt' | |
CHROME_DRIVER_PATH = '/usr/bin/chromedriver' | |
CHROME_PATH = '/usr/bin/google-chrome' | |
# FIREFOX_PATH = '/usr/bin/google-chrome' | |
PROXIES_SEL = [ | |
'fr.proxymesh.com', | |
'us-wa.proxymesh.com', | |
'jp.proxymesh.com', | |
'au.proxymesh.com', | |
'de.proxymesh.com', | |
] | |
def get_random_proxy(): | |
''' | |
returns random single proxy in list format | |
''' | |
PROXIES = ast.literal_eval(open(PROXIES_FILE).read()) | |
random_proxy = {} | |
random_ip_port = PROXIES[random.randint(0, len(PROXIES)-1)] | |
random_proxy['http'] = 'http://'+random_ip_port | |
random_proxy['https'] = 'http://'+random_ip_port | |
return random_proxy | |
def get_random_header(): | |
''' | |
return headers with random useragent | |
''' | |
UA = UserAgent() | |
header = {} | |
header['User-Agent'] = UA.chrome | |
return header | |
def get_response(url, use_proxy=True): | |
''' | |
return response from random proxy and useragent | |
''' | |
headers = get_random_header() | |
if use_proxy: | |
print("Browsing(Proxy): ", url) | |
proxies = get_random_proxy() | |
resp = requests.get(url, headers=headers, proxies=proxies) | |
else: | |
print("Browsing: ", url) | |
resp = requests.get(url, headers=headers) | |
return resp | |
def get_chrome_page(url): | |
from selenium.webdriver.chrome.options import Options | |
options = Options() | |
# options.binary_location = '/usr/bin/google-chrome' | |
options.add_argument("--headless") | |
options.add_argument("--no-sandbox") | |
# driver = webdriver.Chrome(chrome_options=options, executable_path='/usr/bin/chromedriver') | |
driver = webdriver.Chrome(chrome_options=options) | |
resp = get_response(url) | |
data = "data:text/html;charset=utf-8,{html_content}".format( | |
html_content=resp.text) | |
driver.get(data) | |
time.sleep(2) | |
page = html.fromstring(driver.page_source.encode('utf-8')) | |
driver.close() | |
return page | |
def load_static_chrome(text): | |
from selenium.webdriver.chrome.options import Options | |
options = Options() | |
options.add_argument("--headless") | |
options.add_argument("--no-sandbox") | |
driver = webdriver.Chrome(chrome_options=options) | |
data = "data:text/html;charset=utf-8,{html_content}".format( | |
html_content=text) | |
driver.get(data) | |
time.sleep(2) | |
page = html.fromstring(driver.page_source.encode('utf-8')) | |
driver.close() | |
return page | |
def get_driver(browser="firefox", use_proxy=False): | |
''' | |
''' | |
if browser.lower().strip()=='chrome': | |
print("Chrome started") | |
from selenium.webdriver.chrome.options import Options | |
options = Options() | |
options.binary_location = CHROME_PATH | |
options.add_argument("--headless") | |
options.add_argument("--no-sandbox") | |
driver = webdriver.Chrome(chrome_options=options, executable_path=CHROME_DRIVER_PATH) | |
return driver | |
elif browser.lower().strip() == 'firefox' and not use_proxy: | |
from selenium.webdriver.firefox.options import Options | |
print("Firefox started") | |
options = Options() | |
options.set_headless(True) | |
driver = webdriver.Firefox(options=options) | |
return driver | |
elif browser.lower().strip() == 'firefox' and use_proxy: | |
from selenium.webdriver.firefox.options import Options | |
tries = 0 | |
while tries < 5: | |
options = Options() | |
options.set_headless(True) | |
profile = webdriver.FirefoxProfile() | |
random_int = random.randint(0, 1) | |
random_proxy_sel = random.choice(PROXIES_SEL) | |
print("Firefox started: (Proxy Mode): " + random_proxy_sel) | |
profile.set_preference("network.proxy.type", 1) | |
profile.set_preference("network.proxy.http", random_proxy_sel) | |
profile.set_preference("network.proxy.http_port", 31280) | |
profile.set_preference("network.proxy.ssl", random_proxy_sel) | |
profile.set_preference("network.proxy.ssl_port", 31280) | |
# profile.set_preference("general.useragent.override", UA.firefox) | |
# driver = webdriver.Firefox(firefox_profile=profile) | |
try: | |
driver = webdriver.Firefox(options=options, firefox_profile=profile) | |
return driver | |
except WebDriverException as e: | |
print("Proxy Error: ") | |
print("Try: ", tries) | |
tries += 1 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment