NicolasBizzozzero/request_proxies.py

## request_proxies.py
import itertools

import requests
from lxml.html import fromstring


def main():
    URL = 'https://example.org'
    BATCH_SIZE_PROXIES = 10
    CYCLE_TIME = 3
    TIMEOUT = 3

    useless_proxies = []
    while True:
        proxy_pool = itertools.cycle(get_proxies(n_proxies=BATCH_SIZE_PROXIES))
        for proxy in range(BATCH_SIZE_PROXIES * CYCLE_TIME):
            proxy = next(proxy_pool)

            if proxy in useless_proxies:
                continue

            try:
                response = requests.get(URL, proxies={"http": proxy, "https": proxy}, timeout=TIMEOUT)
                print(response.text)
            except requests.exceptions.ProxyError:
                useless_proxies.append(proxy)


def get_proxies(n_proxies: int = 10):
    """ Retrieve a maximum of `n_proxies` from the 'free-proxy-list.net' website. """
    URL = 'https://free-proxy-list.net/'

    parser = fromstring(requests.get(URL).text)
    proxies = set()
    for line in parser.xpath('//tbody/tr'):
        if n_proxies <= 0:
            break

        if line.xpath('.//td[7][contains(text(),"yes")]'):
            # Grabbing IP and corresponding port
            proxy = ":".join([line.xpath('.//td[1]/text()')[0], line.xpath('.//td[2]/text()')[0]])
            proxies.add(proxy)

            n_proxies -= 1
    return proxies


if __name__ == '__main__':
    main()
	import itertools

	import requests
	from lxml.html import fromstring


	def main():
	URL = 'https://example.org'
	BATCH_SIZE_PROXIES = 10
	CYCLE_TIME = 3
	TIMEOUT = 3

	useless_proxies = []
	while True:
	proxy_pool = itertools.cycle(get_proxies(n_proxies=BATCH_SIZE_PROXIES))
	for proxy in range(BATCH_SIZE_PROXIES * CYCLE_TIME):
	proxy = next(proxy_pool)

	if proxy in useless_proxies:
	continue

	try:
	response = requests.get(URL, proxies={"http": proxy, "https": proxy}, timeout=TIMEOUT)
	print(response.text)
	except requests.exceptions.ProxyError:
	useless_proxies.append(proxy)


	def get_proxies(n_proxies: int = 10):
	""" Retrieve a maximum of `n_proxies` from the 'free-proxy-list.net' website. """
	URL = 'https://free-proxy-list.net/'

	parser = fromstring(requests.get(URL).text)
	proxies = set()
	for line in parser.xpath('//tbody/tr'):
	if n_proxies <= 0:
	break

	if line.xpath('.//td[7][contains(text(),"yes")]'):
	# Grabbing IP and corresponding port
	proxy = ":".join([line.xpath('.//td[1]/text()')[0], line.xpath('.//td[2]/text()')[0]])
	proxies.add(proxy)

	n_proxies -= 1
	return proxies


	if __name__ == '__main__':
	main()