Skip to content

Instantly share code, notes, and snippets.

@mertcangokgoz
Created May 22, 2021 09:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mertcangokgoz/63ebb1e791c4247c35b33f3e1b441751 to your computer and use it in GitHub Desktop.
Save mertcangokgoz/63ebb1e791c4247c35b33f3e1b441751 to your computer and use it in GitHub Desktop.
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from random_useragent import random_useragent
class RequestsHelper(object):
"""HTTP Request helper"""
def __init__(self):
super(RequestsHelper, self).__init__()
""" Create session for long TTL pages"""
session = requests.Session()
retries = 5
backoff_factor = 0.3
status_forcelist = (500, 502, 504)
retry = Retry(
total=retries,
read=retries,
connect=retries,
backoff_factor=backoff_factor,
status_forcelist=status_forcelist,
)
adapter = HTTPAdapter(max_retries=retry)
session.mount("http://", adapter)
session.mount("https://", adapter)
self.session = session
def _generate_random_useragent(self):
"""
Generate random user-agent string
:return: User-agent string
"""
randomizer = random_useragent.Randomize()
return randomizer.random_agent("desktop", "windows")
def _is_cloudflare_protected(self, r):
"""
Check if page has protected with Cloudflare
:param object r:
:return: Cloudflare anti-ddos challenge status
:rtype: bool
"""
server = r.headers.get("Server", "")
return (
r.status_code == 503
and server.startswith("cloudflare")
and b"jschl_vc" in r.content
and b"jschl_answer" in r.content
)
def resolve_redirection(self, url):
"""
Resolve redirection and return final url without download page
:param str url:
:return: Return final url
"""
r = self.session.head(url, allow_redirects=True)
return r.url
def get(self, url):
"""
Fetch page content or make HTTP GET request
:param str url: Endpoint or page URL
:return: Requests object
:raises Exception when error happened
"""
try:
r = self.session.get(
url,
headers={"User-Agent": self._generate_random_useragent()},
timeout=5,
)
except Exception as e:
raise e
if self._is_cloudflare_protected(r):
import cfscrape
scrapper = cfscrape.create_scraper()
r = scrapper.get(r.url, timeout=5)
if b"indows-1254" in r.content:
r.encoding = "Windows-1254"
else:
r.encoding = "utf-8"
return r
def post(self, url, data):
"""
Fetch page content or make HTTP GET request
:param str url: Endpoint or page URL
:param dict data: Post data dictionary
:return: Requests object
:raises Exception when error happened
"""
try:
r = self.session.post(
url,
data=data,
headers={"User-Agent": self._generate_random_useragent()},
timeout=5,
)
except Exception as e:
raise e
if self._is_cloudflare_protected(r):
import cfscrape # pip install cfscrape
scrapper = cfscrape.create_scraper()
r = scrapper.post(r.url, data=data, timeout=5)
if b"indows-1254" in r.content:
r.encoding = "Windows-1254"
else:
r.encoding = "utf-8"
return r
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment