Skip to content

Instantly share code, notes, and snippets.

@guruguruman
Last active January 24, 2021 14:33
Show Gist options
  • Save guruguruman/1e424ef0a7e8d9e0113a2c9c0b9dfa9e to your computer and use it in GitHub Desktop.
Save guruguruman/1e424ef0a7e8d9e0113a2c9c0b9dfa9e to your computer and use it in GitHub Desktop.
Rotate public listed proxies per each request in PHP.
<?php
/**
* A class which provide public listing proxies rotated when client request.
*/
Class ScrapingProxyProvider
{
// Proxies available.
private $proxyDatas = array();
// Currently used proxy.
private $proxyData = null;
// Maximum count client can use per proxy.
public $limitUsageCount = 5;
// Current proxy used count by client.
private $usedCount = 0;
/**
* Return proxy data list remotely, currently we fetch from 'https://proxy.l337.tech'
* with big thanks.
*/
private function getRemoteProxies()
{
$proxyDatas = array();
$content = file_get_contents("https://proxy.l337.tech/txt");
$rawProxies = explode("\n", $content);
foreach ($rawProxies as $rawProxy) {
$rawProxy = explode(":", $rawProxy);
if (count($rawProxy) != 2) {
continue;
}
$proxyData = array();
$proxyData["ip"] = $rawProxy[0];
$proxyData["port"] = $rawProxy[1];
$proxyDatas[] = $proxyData;
}
return $proxyDatas;
}
/**
* Discurd 'proxyData' which is currently used, to trigger
* force proxy rotation.
*/
public function discardCurrent()
{
$this->proxyData = null;
}
/**
* Return proxy data which has 'ip' with 'port' separatedly.
* Each time when client calll, roatte proxy if needed.
*/
public function fetchProxyData()
{
// Get proxies remotely if proxies get out of stocks or no proxies exists.
if (!$this->proxyDatas || count($this->proxyDatas) == 0) {
$proxies = $this->getRemoteProxies();
$this->proxyDatas = $proxies;
}
// Rotate proxy.
if (!$this->proxyData || $this->limitUsageCount <= $this->usedCount){
$nextProxyData = array_shift($this->proxyDatas);
$this->usedCount = 0;
$this->proxyData = $nextProxyData;
}
$this->usedCount++;
return $this->proxyData;
}
}
/**
* Example following to give example how we use handle.
* When the ip banned, recommended to call 'discardCurrent' to rotate forcely.
*/
$proxyProvider = new ScrapingProxyProvider();
$proxyProvider->limitUsageCount = 5;
$url = "https://example.com";
$requestCount = 30;
for ($i = 0; $i < $requestCount; $i++) {
$proxyData = $proxyProvider->fetchProxyData();
$ch = curl_init($url);
$proxy = "{$proxyData["ip"]}:{$proxyData["port"]}";
curl_setopt_array($ch, array(
CURLOPT_PROXY => $proxy,
CURLOPT_HEADER => true,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => true,
));
$content = curl_exec($ch);
$status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
if ($status != 200) {
print(" Used proxy could be banned, discard current proxy to get next one.");
$proxyProvider->discardCurrent();
}
print("{$proxy} => ". $status. PHP_EOL);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment