Skip to content

Instantly share code, notes, and snippets.

@6aditya8
Created August 27, 2019 14:41
Show Gist options
  • Save 6aditya8/b6a3a7b76cc6b8f9d27732bfc5e0da92 to your computer and use it in GitHub Desktop.
Save 6aditya8/b6a3a7b76cc6b8f9d27732bfc5e0da92 to your computer and use it in GitHub Desktop.
Get a list of proxy IP Addresses (useful for IP Rotation during scraping)
from lxml.html import fromstring
import random
import requests
def get_proxies():
"""
Gather a list of some active proxies from https://free-proxy-list.net/
:return: List of IP Addresses
"""
url = 'https://free-proxy-list.net/'
response = requests.get(url)
parser = fromstring(response.text)
proxies = set()
for i in parser.xpath('//tbody/tr')[:1000]:
if i.xpath('.//td[7][contains(text(),"yes")]'):
# Grabbing IP and corresponding PORT
proxy = ":".join([i.xpath('.//td[1]/text()')[0], i.xpath('.//td[2]/text()')[0]])
proxies.add(proxy)
return list(proxies)
proxy_ips = get_proxies()
# To randomly select an IP from the collected proxies
random_ip = random.choice(proxy_ips)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment