Created
January 29, 2015 17:55
-
-
Save dlundgren/cd0d3737fce8c83f86ee to your computer and use it in GitHub Desktop.
Converts a Blocklist.de txt file from a list of IP's into a list with CIDR's on top and the rest of the ip's at the bottom.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Class BlocklistDe | |
* | |
* Parses a BlockListDe list text file and converts multiple hosts into CIDR notation | |
* if possible | |
* | |
* @author David Lundgren | |
* @license MIT | |
*/ | |
class BlocklistDe | |
implements Iterator | |
{ | |
const CLASS_A = 1; | |
const CLASS_B = 2; | |
const CLASS_C = 3; | |
/** | |
* @var array List of ips and cidr's | |
*/ | |
protected $list = []; | |
/** | |
* @var array List of netmasks for each class | |
*/ | |
private $netmasks = [ | |
self::CLASS_C => [ | |
25 => '255.255.255.128', | |
26 => '255.255.255.192', | |
27 => '255.255.255.224', | |
28 => '255.255.255.240', | |
29 => '255.255.255.248', | |
30 => '255.255.255.252', | |
31 => '255.255.255.254', | |
], | |
self::CLASS_B => [ | |
17 => '255.255.128.0', | |
18 => '255.255.192.0', | |
19 => '255.255.224.0', | |
20 => '255.255.240.0', | |
21 => '255.255.248.0', | |
22 => '255.255.252.0', | |
23 => '255.255.254.0', | |
] | |
]; | |
private $pos = 0; | |
public function __construct($file = null) | |
{ | |
$file && $this->loadFromFile($file); | |
} | |
/** Iterator **/ | |
public function current() | |
{ | |
return $this->list[$this->pos]; | |
} | |
public function key() | |
{ | |
return $this->pos; | |
} | |
public function next() | |
{ | |
++$this->pos; | |
} | |
public function rewind() | |
{ | |
$this->pos = 0; | |
} | |
public function valid() | |
{ | |
return isset($this->list[$this->pos]); | |
} | |
/** custom **/ | |
/** | |
* Reduces the CIDRS in an array to their canonical versions ip/mask | |
* | |
* @param $prefix | |
* @param $nets | |
* @param $cidr | |
* @param bool $netsAreHosts | |
* @return array | |
*/ | |
private function reduceCidr($prefix, $nets, $cidr, $netsAreHosts) | |
{ | |
$bits = 8 - ($cidr % 8); | |
$subnets = pow(2, $bits); | |
$totalNets = $netsAreHosts ? pow(2, 32 - $cidr) : $subnets; | |
$split = 256 / $subnets; | |
$reduced = []; | |
for ($j = 0; $j < 254; $j += $split) { | |
if (isset($nets[$j]) && ($nets[$j] == $totalNets)) { | |
$reduced[] = "$prefix.$j/$cidr"; | |
} | |
} | |
return $reduced; | |
} | |
/** | |
* Returns whether or not the ip is in the given network | |
* | |
* @param $ip | |
* @param $network | |
* @param $mask | |
* @return bool | |
*/ | |
private function inNetwork($ip, $network, $mask) | |
{ | |
return (ip2long($ip) & ~((1 << (32 - $mask)) - 1)) == ip2long($network); | |
} | |
/** | |
* Process the ips into appropriate cidr bin if available | |
* | |
* @param int $class One of the CLASS_* constants | |
* @param array $ips List of the ips to process | |
* @param \Closure $formatter How to format the ip | |
* @return array | |
*/ | |
private function processIps($class, &$ips, \Closure $formatter) | |
{ | |
natsort($ips); | |
$bin = []; | |
foreach ($ips as $host) { | |
$ip = ip2long($formatter($host)); | |
foreach ($this->netmasks[$class] as $cidr => $mask) { | |
$network = explode('.', long2ip(($ip & ~((1 << (32 - $cidr)) - 1)))); | |
$bin[$cidr][$network[$class]] = isset($bin[$cidr][$network[$class]]) ? $bin[$cidr][$network[$class]] + 1 : 1; | |
} | |
} | |
return $bin; | |
} | |
/** | |
* Reduce the CIDR bin into the IP/CIDR notation | |
* | |
* @param int $class One of the CLASS_* constants | |
* @param string $prefix Prefix for the super net | |
* @param array $bin The cidr bin processed from process_ips | |
* @param bool $useHosts Whether or not the CIDR should use host or subnet counting | |
* @return array | |
*/ | |
private function reduceNetmasks($class, $prefix, &$bin, $useHosts) | |
{ | |
$nets = []; | |
foreach ($this->netmasks[$class] as $cidr => $mask) { | |
$data = $this->reduceCidr($prefix, $bin[$cidr], $cidr, $useHosts); | |
foreach ($data as $ipmask) { | |
list($ip, $mask) = explode('/', $ipmask); | |
$found = false; | |
foreach ($nets as $n => $m) { | |
$this->inNetwork($ip, $n, $m) && $found = true; | |
} | |
if (!$found) { | |
$nets[$ip] = $mask; | |
} | |
} | |
} | |
return $nets; | |
} | |
/** | |
* Removes the networks from the IPs if they match the network | |
* | |
* @param array $nets List of networks from reduce_netmasks | |
* @param array $ipnet List of current networks | |
* @param array $ips List of IPs | |
* @param \Closure $formatter how to format the ip | |
*/ | |
private function removeNetsFromIps($nets, &$ipnets, &$ips, \Closure $formatter) | |
{ | |
foreach ($nets as $n => $mask) { | |
$ip = $formatter($n); | |
$ipnets[$mask][] = $ip; | |
foreach (array_keys($ips) as $k) { | |
$ipm = $formatter($ips[$k]); | |
if ($this->inNetwork($ipm, $ip, $mask)) { | |
unset($ips[$k]); | |
} | |
} | |
} | |
} | |
/** | |
* Loads the file into the list | |
* | |
* @param $file | |
*/ | |
public function loadFromFile($file) | |
{ | |
if (!file_exists($file)) { | |
throw new \InvalidArgumentException("$file does not exist or is not readable."); | |
} | |
$nets = []; | |
$ips = []; | |
$ipnets = []; | |
$f = new SplFileObject($file); | |
while (!$f->eof()) { | |
$line = trim($f->getCurrentLine()); | |
if (empty($line) || $line[0] == '#' || strpos($line, ':') !== false) { | |
continue; | |
} | |
list($a, $b, $c, $d) = explode('.', trim($line)); | |
$ips[$a][$b][$c][] = $d; | |
$network = "$a.$b.$c"; | |
!isset($nets[self::CLASS_C][$network]) && $nets[self::CLASS_C][$network] = 0; | |
$nets[self::CLASS_C][$network]++; | |
} | |
ksort($nets[self::CLASS_C]); | |
ksort($ips); | |
$this->reduceClassC($ipnets, $nets, $ips); | |
$this->reduceClassB($ipnets, $nets, $ips); | |
$this->updateIterator($ipnets, $ips); | |
} | |
/** | |
* Combines the ipnets and ips into a single list | |
*/ | |
protected function updateIterator(&$ipnets, &$ips) | |
{ | |
// reset the entire array subsytem | |
ksort($ipnets); | |
foreach ($ipnets as $mask => $networks) { | |
foreach ($networks as $network) { | |
$this->list[] = "$network/$mask"; | |
} | |
} | |
unset($this->ipnet); | |
// anything left in $ips by this point are actually hosts that need to be blocked | |
ksort($ips); | |
foreach ($ips as $a => $bs) { | |
ksort($bs); | |
foreach ($bs as $b => $cs) { | |
ksort($cs); | |
foreach ($cs as $c => $ds) { | |
ksort($ds); | |
foreach ($ds as $k => $d) { | |
unset($ips[$a][$b][$c][$k]); | |
$this->list[] = "$a.$b.$c.$d"; | |
} | |
unset($ips[$a][$b][$c]); | |
} | |
unset($ips[$a][$b]); | |
} | |
unset($ips[$a]); | |
} | |
unset($ips); | |
// always reset since this was loaded | |
$this->rewind(); | |
} | |
/** | |
* Writes the list out to the given file | |
* | |
* @param $file | |
*/ | |
public function writeToFile($file) | |
{ | |
$f = new SplFileObject($file, 'w'); | |
foreach ($this as $entry) { | |
$f->fwrite("$entry\n"); | |
} | |
} | |
/** | |
* Reduces any IP's in the Class C range to appropriate CIDR entries | |
*/ | |
protected function reduceClassC(&$ipnets, &$nets, &$ips) | |
{ | |
// set the formatters for use in Class C's | |
$ipFormatter = function ($host) use (&$network) { | |
return "$network.$host"; | |
}; | |
$netFormatter = function ($n) use (&$network) { | |
return substr_count($n, '.') == 3 ? $n : "$network.$n"; | |
}; | |
// process all the class c's | |
foreach ($nets[self::CLASS_C] as $network => $count) { | |
list($a, $b, $c) = explode('.', $network); | |
if ($count == 256) { | |
unset($this->ips[$a][$b][$c]); | |
$this->ipnet[24][] = $network; | |
continue; | |
} | |
$bin = $this->processIps(self::CLASS_C, $ips[$a][$b][$c], $ipFormatter); | |
$nets = $this->reduceNetmasks(self::CLASS_C, $network, $bin, true); | |
$this->removeNetsFromIps($nets, $ipnets, $ips[$a][$b][$c], $netFormatter); | |
} | |
} | |
/** | |
* Reduces any IP's in the Class B range to appropriate CIDR entries | |
*/ | |
protected function reduceClassB(&$ipnets, &$nets) | |
{ | |
if (empty($ipnets[24])) { | |
return; | |
} | |
natsort($ipnets[24]); | |
// split the IP's out | |
$ips = []; | |
$subnets = []; | |
foreach ($ipnets[24] as $net) { | |
list($a, $b, $c) = explode('.', $net); | |
$network = "$a.$b"; | |
$ips[$network][] = $c; | |
!isset($subnets[$network]) && $subnets[$network] = 0; | |
$subnets[$network]++; | |
} | |
$ipFormatter = function ($host) use (&$network) { | |
return "$network.$host.0"; | |
}; | |
$netFormatter = function ($network) { | |
return "$network.0"; | |
}; | |
foreach ($subnets as $network => $count) { | |
if ($count == 65535) { | |
$ipnets[16][] = $network; | |
continue; | |
} | |
$bin = $this->processIps(self::CLASS_B, $ips[$network], $ipFormatter); | |
$nets = $this->reduceNetmasks(self::CLASS_B, $network, $bin, false); | |
$this->removeNetsFromIps($nets, $ipnets, $ipnets[24], $netFormatter); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
$botList = new BlocklistDe('bots.txt');
$botList->writeToFile('converted-bots.txt');