Skip to content

Instantly share code, notes, and snippets.

@NicolasBizzozzero
Last active October 23, 2019 15:36
Show Gist options
  • Save NicolasBizzozzero/e26fb23ce92daa642f0729648b51ce1e to your computer and use it in GitHub Desktop.
Save NicolasBizzozzero/e26fb23ce92daa642f0729648b51ce1e to your computer and use it in GitHub Desktop.
Example of using the Python's requests library with RR cycling proxies.
import itertools
import requests
from lxml.html import fromstring
def main():
URL = 'https://example.org'
BATCH_SIZE_PROXIES = 10
CYCLE_TIME = 3
TIMEOUT = 3
useless_proxies = []
while True:
proxy_pool = itertools.cycle(get_proxies(n_proxies=BATCH_SIZE_PROXIES))
for proxy in range(BATCH_SIZE_PROXIES * CYCLE_TIME):
proxy = next(proxy_pool)
if proxy in useless_proxies:
continue
try:
response = requests.get(URL, proxies={"http": proxy, "https": proxy}, timeout=TIMEOUT)
print(response.text)
except requests.exceptions.ProxyError:
useless_proxies.append(proxy)
def get_proxies(n_proxies: int = 10):
""" Retrieve a maximum of `n_proxies` from the 'free-proxy-list.net' website. """
URL = 'https://free-proxy-list.net/'
parser = fromstring(requests.get(URL).text)
proxies = set()
for line in parser.xpath('//tbody/tr'):
if n_proxies <= 0:
break
if line.xpath('.//td[7][contains(text(),"yes")]'):
# Grabbing IP and corresponding port
proxy = ":".join([line.xpath('.//td[1]/text()')[0], line.xpath('.//td[2]/text()')[0]])
proxies.add(proxy)
n_proxies -= 1
return proxies
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment