Created
March 26, 2018 06:55
-
-
Save yhojann-cl/8e147473622f30fddd52c7f1cd85806b to your computer and use it in GitHub Desktop.
Detecta y banea direcciones IP que realicen solicitudes HTTP continuas sin solicitar archivos estáticos como hojas de estilo (típico de un robot)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// clear-spam.php | |
class ClearSpam | |
{ | |
private $argv; | |
private $log_path; | |
private $tmp_path; | |
private $addrs; | |
private $bans; | |
public function __construct($argv) | |
{ | |
$this->log('ClearSpamBan 1.2, por WHK'); | |
$this->argv = $argv; | |
if(count($this->argv) < 3) | |
{ | |
$this->log('Uso: '.$this->argv[0].' [log path] [htaccess path]'); | |
} | |
else | |
{ | |
$this->tmp_path = '/tmp/'; | |
$this->log_path = $this->tmp_path.trim(basename($this->argv[1])); | |
$this->htaccess_path = trim($this->argv[2]); | |
$this->addrs = array(); | |
$this->bans = array(); | |
if(file_exists($this->argv[1])) // Ruta real | |
{ | |
if(file_exists($this->htaccess_path)) | |
{ | |
// Crea una copia del log para que no de problemas con apache | |
// cuando intente escribir sobre el log y php lo tenga bloqueado | |
// utilizando un puntero distinto. | |
if(file_exists($this->log_path)) | |
{ | |
unlink($this->log_path); | |
} | |
$this->log('Clonando archivo log ...'); | |
$this->log($this->argv[1].' -> '.$this->log_path); | |
if(@copy($this->argv[1], $this->log_path)) | |
{ | |
// Carga las direcciones ip previamente baneadas | |
$this->loadCurrentBans(); | |
// Va en búsca de las ip a banear | |
$this->checkBotByNonStaticRequest(); | |
$this->log('Eliminando log temporal ...'); | |
unlink($this->log_path); | |
$this->log('Finalizado!'); | |
} | |
else | |
{ | |
$this->log('Imposible copiar el log a procesar.'); | |
} | |
} | |
else | |
{ | |
$this->log('El archivo htaccess no existe o no es accesible.'); | |
} | |
} | |
else | |
{ | |
$this->log('El archivo log no existe o no es accesible.'); | |
} | |
} | |
} | |
private function ban($ip) | |
{ | |
# Ya había sido baneado? | |
if(!in_array($ip, $this->bans)) | |
{ | |
$this->bans[] = $ip; | |
$this->log('Baneando dirección IP '.$ip.' ('.gethostbyaddr($ip).') ...'); | |
$buffer = file_get_contents($this->htaccess_path); | |
if(strpos($buffer, 'CSpamBan') === false) | |
{ | |
$buffer .= "\n\n".implode("\n", array( | |
'# <CSpamBan>', | |
' order allow,deny', | |
' deny from '.$ip, | |
' # NEW_HERE', | |
' allow from all', | |
'# </CSpamBan>' | |
)); | |
} | |
else | |
{ | |
$buffer = str_replace('# NEW_HERE', 'deny from '.$ip."\n # NEW_HERE", $buffer); | |
} | |
file_put_contents($this->htaccess_path, $buffer); | |
} | |
} | |
private function loadCurrentBans() | |
{ | |
$this->log('Cargando direcciones IP previamente baneadas ...'); | |
$handle = fopen($this->htaccess_path, 'r'); | |
if($handle) | |
{ | |
$in_block = false; | |
while(($line = fgets($handle)) !== false) | |
{ | |
if(strpos($line, '<CSpanBan>') !== false) | |
{ | |
$in_block = true; | |
} | |
if(strpos($line, '</CSpanBan>') !== false) | |
{ | |
$in_block = false; | |
} | |
if($in_block) | |
{ | |
$this->bans[] = trim($line); | |
} | |
} | |
} | |
$this->log('Se han cargado '.count($this->bans).' direcciones IP baneadas.'); | |
} | |
private function checkBotByNonStaticRequest() | |
{ | |
$this->log('Buscando robots: Sin solicitudes de archivos estáticos ...'); | |
$this->log('Procesando log ...'); | |
$handle = fopen($this->log_path, 'r'); | |
if($handle) | |
{ | |
while(($line = fgets($handle)) !== false) | |
{ | |
if(preg_match('/(\\d+\\.\\d+\\.\\d+\\.\\d+)\\s+.*?"\\w+\\s+(.*?)\\s/is', trim($line), $matches)) | |
{ | |
// Verifica si es un robot permitido o no | |
$hostname = gethostbyaddr($matches[1]); | |
if(strpos($hostname, '.') !== false) | |
{ | |
$hostname = explode('.', $hostname); | |
$hostname = $hostname[count($hostname) - 2].'.'.$hostname[count($hostname) - 1]; | |
} | |
if( | |
// Si ya está baneado no lo procesa, ahorra uso de CPU | |
(!in_array($matches[1], $this->bans)) && | |
// Si es un robot permitido no lo procesa | |
(!in_array(strtolower($hostname), array( | |
'google.com', 'googlebot.com', # Google | |
'msn.com', 'microsoft.com', # Bing | |
'yahoo.net', 'yahoo.com', # Yahoo | |
'ivegotafang.com', # DuckDuck | |
'facebook.com' # Facebook | |
))) | |
) | |
{ | |
// Crea el arreglo de la dirección IP si no existe | |
if(!isset($this->addrs[$matches[1]])) | |
{ | |
$this->addrs[$matches[1]] = 0; | |
} | |
if(preg_match('/\\.(js|css|woff|txt|xml|jpg|jpeg|png|gif|ico)/is', strtolower($matches[2]))) | |
{ | |
// Si ha solicitado un archivo estático está bien :) posiblemente no es un bot | |
$this->addrs[$matches[1]] = 0; | |
} | |
else | |
{ | |
// Añade un contador. Sospechoso de ser un robot | |
$this->addrs[$matches[1]]++; | |
if($this->addrs[$matches[1]] > 4) | |
{ | |
// Si ha solicitado muchas veces archivos ejecutables | |
// sin solicitar archivos estáticos es porque | |
// definitivamente es un robot. | |
$this->ban($matches[1]); | |
} | |
} | |
} | |
} | |
} | |
fclose($handle); | |
} | |
else | |
{ | |
$this->log('Imposible leer el log.'); | |
} | |
} | |
private function log($str) | |
{ | |
echo $str."\n"; | |
} | |
} | |
$clearSpam = new ClearSpam($argv); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment