Skip to content

Instantly share code, notes, and snippets.

@dlundgren
Created January 29, 2015 17:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dlundgren/cd0d3737fce8c83f86ee to your computer and use it in GitHub Desktop.
Save dlundgren/cd0d3737fce8c83f86ee to your computer and use it in GitHub Desktop.
Converts a Blocklist.de txt file from a list of IP's into a list with CIDR's on top and the rest of the ip's at the bottom.
<?php
/**
* Class BlocklistDe
*
* Parses a BlockListDe list text file and converts multiple hosts into CIDR notation
* if possible
*
* @author David Lundgren
* @license MIT
*/
class BlocklistDe
implements Iterator
{
const CLASS_A = 1;
const CLASS_B = 2;
const CLASS_C = 3;
/**
* @var array List of ips and cidr's
*/
protected $list = [];
/**
* @var array List of netmasks for each class
*/
private $netmasks = [
self::CLASS_C => [
25 => '255.255.255.128',
26 => '255.255.255.192',
27 => '255.255.255.224',
28 => '255.255.255.240',
29 => '255.255.255.248',
30 => '255.255.255.252',
31 => '255.255.255.254',
],
self::CLASS_B => [
17 => '255.255.128.0',
18 => '255.255.192.0',
19 => '255.255.224.0',
20 => '255.255.240.0',
21 => '255.255.248.0',
22 => '255.255.252.0',
23 => '255.255.254.0',
]
];
private $pos = 0;
public function __construct($file = null)
{
$file && $this->loadFromFile($file);
}
/** Iterator **/
public function current()
{
return $this->list[$this->pos];
}
public function key()
{
return $this->pos;
}
public function next()
{
++$this->pos;
}
public function rewind()
{
$this->pos = 0;
}
public function valid()
{
return isset($this->list[$this->pos]);
}
/** custom **/
/**
* Reduces the CIDRS in an array to their canonical versions ip/mask
*
* @param $prefix
* @param $nets
* @param $cidr
* @param bool $netsAreHosts
* @return array
*/
private function reduceCidr($prefix, $nets, $cidr, $netsAreHosts)
{
$bits = 8 - ($cidr % 8);
$subnets = pow(2, $bits);
$totalNets = $netsAreHosts ? pow(2, 32 - $cidr) : $subnets;
$split = 256 / $subnets;
$reduced = [];
for ($j = 0; $j < 254; $j += $split) {
if (isset($nets[$j]) && ($nets[$j] == $totalNets)) {
$reduced[] = "$prefix.$j/$cidr";
}
}
return $reduced;
}
/**
* Returns whether or not the ip is in the given network
*
* @param $ip
* @param $network
* @param $mask
* @return bool
*/
private function inNetwork($ip, $network, $mask)
{
return (ip2long($ip) & ~((1 << (32 - $mask)) - 1)) == ip2long($network);
}
/**
* Process the ips into appropriate cidr bin if available
*
* @param int $class One of the CLASS_* constants
* @param array $ips List of the ips to process
* @param \Closure $formatter How to format the ip
* @return array
*/
private function processIps($class, &$ips, \Closure $formatter)
{
natsort($ips);
$bin = [];
foreach ($ips as $host) {
$ip = ip2long($formatter($host));
foreach ($this->netmasks[$class] as $cidr => $mask) {
$network = explode('.', long2ip(($ip & ~((1 << (32 - $cidr)) - 1))));
$bin[$cidr][$network[$class]] = isset($bin[$cidr][$network[$class]]) ? $bin[$cidr][$network[$class]] + 1 : 1;
}
}
return $bin;
}
/**
* Reduce the CIDR bin into the IP/CIDR notation
*
* @param int $class One of the CLASS_* constants
* @param string $prefix Prefix for the super net
* @param array $bin The cidr bin processed from process_ips
* @param bool $useHosts Whether or not the CIDR should use host or subnet counting
* @return array
*/
private function reduceNetmasks($class, $prefix, &$bin, $useHosts)
{
$nets = [];
foreach ($this->netmasks[$class] as $cidr => $mask) {
$data = $this->reduceCidr($prefix, $bin[$cidr], $cidr, $useHosts);
foreach ($data as $ipmask) {
list($ip, $mask) = explode('/', $ipmask);
$found = false;
foreach ($nets as $n => $m) {
$this->inNetwork($ip, $n, $m) && $found = true;
}
if (!$found) {
$nets[$ip] = $mask;
}
}
}
return $nets;
}
/**
* Removes the networks from the IPs if they match the network
*
* @param array $nets List of networks from reduce_netmasks
* @param array $ipnet List of current networks
* @param array $ips List of IPs
* @param \Closure $formatter how to format the ip
*/
private function removeNetsFromIps($nets, &$ipnets, &$ips, \Closure $formatter)
{
foreach ($nets as $n => $mask) {
$ip = $formatter($n);
$ipnets[$mask][] = $ip;
foreach (array_keys($ips) as $k) {
$ipm = $formatter($ips[$k]);
if ($this->inNetwork($ipm, $ip, $mask)) {
unset($ips[$k]);
}
}
}
}
/**
* Loads the file into the list
*
* @param $file
*/
public function loadFromFile($file)
{
if (!file_exists($file)) {
throw new \InvalidArgumentException("$file does not exist or is not readable.");
}
$nets = [];
$ips = [];
$ipnets = [];
$f = new SplFileObject($file);
while (!$f->eof()) {
$line = trim($f->getCurrentLine());
if (empty($line) || $line[0] == '#' || strpos($line, ':') !== false) {
continue;
}
list($a, $b, $c, $d) = explode('.', trim($line));
$ips[$a][$b][$c][] = $d;
$network = "$a.$b.$c";
!isset($nets[self::CLASS_C][$network]) && $nets[self::CLASS_C][$network] = 0;
$nets[self::CLASS_C][$network]++;
}
ksort($nets[self::CLASS_C]);
ksort($ips);
$this->reduceClassC($ipnets, $nets, $ips);
$this->reduceClassB($ipnets, $nets, $ips);
$this->updateIterator($ipnets, $ips);
}
/**
* Combines the ipnets and ips into a single list
*/
protected function updateIterator(&$ipnets, &$ips)
{
// reset the entire array subsytem
ksort($ipnets);
foreach ($ipnets as $mask => $networks) {
foreach ($networks as $network) {
$this->list[] = "$network/$mask";
}
}
unset($this->ipnet);
// anything left in $ips by this point are actually hosts that need to be blocked
ksort($ips);
foreach ($ips as $a => $bs) {
ksort($bs);
foreach ($bs as $b => $cs) {
ksort($cs);
foreach ($cs as $c => $ds) {
ksort($ds);
foreach ($ds as $k => $d) {
unset($ips[$a][$b][$c][$k]);
$this->list[] = "$a.$b.$c.$d";
}
unset($ips[$a][$b][$c]);
}
unset($ips[$a][$b]);
}
unset($ips[$a]);
}
unset($ips);
// always reset since this was loaded
$this->rewind();
}
/**
* Writes the list out to the given file
*
* @param $file
*/
public function writeToFile($file)
{
$f = new SplFileObject($file, 'w');
foreach ($this as $entry) {
$f->fwrite("$entry\n");
}
}
/**
* Reduces any IP's in the Class C range to appropriate CIDR entries
*/
protected function reduceClassC(&$ipnets, &$nets, &$ips)
{
// set the formatters for use in Class C's
$ipFormatter = function ($host) use (&$network) {
return "$network.$host";
};
$netFormatter = function ($n) use (&$network) {
return substr_count($n, '.') == 3 ? $n : "$network.$n";
};
// process all the class c's
foreach ($nets[self::CLASS_C] as $network => $count) {
list($a, $b, $c) = explode('.', $network);
if ($count == 256) {
unset($this->ips[$a][$b][$c]);
$this->ipnet[24][] = $network;
continue;
}
$bin = $this->processIps(self::CLASS_C, $ips[$a][$b][$c], $ipFormatter);
$nets = $this->reduceNetmasks(self::CLASS_C, $network, $bin, true);
$this->removeNetsFromIps($nets, $ipnets, $ips[$a][$b][$c], $netFormatter);
}
}
/**
* Reduces any IP's in the Class B range to appropriate CIDR entries
*/
protected function reduceClassB(&$ipnets, &$nets)
{
if (empty($ipnets[24])) {
return;
}
natsort($ipnets[24]);
// split the IP's out
$ips = [];
$subnets = [];
foreach ($ipnets[24] as $net) {
list($a, $b, $c) = explode('.', $net);
$network = "$a.$b";
$ips[$network][] = $c;
!isset($subnets[$network]) && $subnets[$network] = 0;
$subnets[$network]++;
}
$ipFormatter = function ($host) use (&$network) {
return "$network.$host.0";
};
$netFormatter = function ($network) {
return "$network.0";
};
foreach ($subnets as $network => $count) {
if ($count == 65535) {
$ipnets[16][] = $network;
continue;
}
$bin = $this->processIps(self::CLASS_B, $ips[$network], $ipFormatter);
$nets = $this->reduceNetmasks(self::CLASS_B, $network, $bin, false);
$this->removeNetsFromIps($nets, $ipnets, $ipnets[24], $netFormatter);
}
}
}
@dlundgren
Copy link
Author

$botList = new BlocklistDe('bots.txt');
$botList->writeToFile('converted-bots.txt');

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment