Skip to content

Instantly share code, notes, and snippets.

@Cguilliman
Last active January 20, 2020 13:06
Show Gist options
  • Save Cguilliman/f171e76939baa350c4dbc2d97140bf2d to your computer and use it in GitHub Desktop.
Save Cguilliman/f171e76939baa350c4dbc2d97140bf2d to your computer and use it in GitHub Desktop.
Proxy module
import requests
from bs4 import BeautifulSoup as bs
from apps.proxy.models import Proxy
PROXY_URL = "https://www.sslproxies.org/"
TEST_URL = "https://www.google.com/"
class ProxyGetter:
def remote_proxies(self):
response = requests.get(PROXY_URL)
soup = bs(response.content, "lxml")
trs = soup.select('tr', {'role': 'row'})
for tr in trs[1:21]:
tds = tr.select('td')
yield tds[0].text + ':' + tds[1].text
def proxies(self):
for proxy in Proxy.objects.valid():
yield proxy.host
for remote_host in self.remote_proxies():
if (exists_proxy := Proxy.objects.filter(host=remote_host).first()):
if exists_proxy.valid:
return exists_proxy.host
continue
proxy = Proxy.objects.create(host=remote_host)
yield proxy.host
def test_request(self, host):
try:
response = requests.get(
TEST_URL, timeout=10,
proxies={
'http': f"http://{host}",
'https': f"https://{host}"
}
)
return True
except:
return False
def update_proxies(self):
"""
Update proxy list
Iterate hosts make test request and save invalid status failed request
Get actual remote list of proxies and save not exists in db
"""
for proxy in Proxy.objects.valid():
if (is_valid := self.test_request(proxy.host)):
proxy.is_valid = False
proxy.save()
for remote_host in self.remote_proxies():
if not Proxy.objects.filter(host=remote_host).exists():
Proxy.objects.create(
host=remote_host,
is_valid=self.test_request(remote_host)
)
def write_as_invalid(self, host):
proxy, _ = Proxy.objects.get_or_create(host=host)
if proxy.is_valid:
proxy.is_valid = False
proxy.save()
proxy_getter = ProxyGetter()
================= Models =================
from django.db import models
from django.utils.translation import ugettext_lazy as _
class ProxyQuerySet(models.QuerySet):
def valid(self):
return self.filter(is_valid=True)
class Proxy(models.Model):
host = models.CharField(
verbose_name=_('Host'),
max_length=255
)
is_valid = models.BooleanField(
verbose_name=_('Is valid'),
default=True
)
created_at = models.DateTimeField(
verbose_name=_("Created at"),
auto_now_add=True
)
objects = ProxyQuerySet.as_manager()
class Meta:
verbose_name = _("Proxy")
verbose_name_plural = _("Proxies")
def __str__(self):
return self.host
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment