Last active
December 1, 2018 22:17
-
-
Save RevRagnarok/9aec1f2b3029b78165c2ac09230c4dc5 to your computer and use it in GitHub Desktop.
Block crazy IPs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env python | |
from __future__ import print_function | |
from collections import defaultdict | |
import os.path | |
import re | |
import sys | |
import urllib2 | |
from HTMLParser import HTMLParser | |
""" | |
This script will pull IP ranges out of the "pre" sections of websites and manipulate them | |
to generate iptables rules. The output is STDOUT; do with it what you need. | |
Released as GPL by RevRagnarok 2018 | |
""" | |
# The IP host pages | |
source_pages = ['https://www.wizcrafts.net/chinese-iptables-blocklist.html', | |
'https://www.wizcrafts.net/nigerian-iptables-blocklist.html', | |
'https://www.wizcrafts.net/russian-iptables-blocklist.html', | |
'https://www.wizcrafts.net/lacnic-iptables-blocklist.html', | |
] | |
# What you need before and after the IP (this example is OpenWRT): | |
pre_text = 'iptables -A input_rule --src' | |
post_text = '-j DROP' | |
# Comment style: | |
# 0 = none | |
# 1 = last comment found in line (looks nice but more memory taken) | |
# 2 = just filename | |
comment_style = 2 | |
# Comment prefix (if you want to easily be able to grep for statistics) | |
comment_prefix = 'XXX:' | |
#comment_prefix = '' | |
# Debugging | |
# If set to true, uses local copies of the files | |
local_debug = False | |
### End user-serviceable parts | |
comment_text = '-m comment --comment' if comment_style != 0 else '' | |
# Global regex for IP ranges | |
ip_re = re.compile("(\d+\.){3}\d(/\d+)?") | |
# Global regex for lines to skip | |
blank_re = re.compile('^\s*$') | |
# Global page being parsed (for comment style 2) | |
page_name = '' | |
def print_loop_start(): | |
""" Start of bash loop """ | |
print("for ip in ", end='') | |
def print_loop_end(comment): | |
""" End of bash loop """ | |
print("; do") | |
print(' ', pre_text, '${ip}', post_text, comment) | |
print('done') | |
def print_ip_bash_loop(all_ips): | |
""" Takes a dict of comment keys that have list of IPs and creates bash loops """ | |
max_loop_size = 100 # How many IPs max per bash loop | |
max_line_size = 10 # How many IPs max before newline inserted | |
current_loop_size = 0 | |
current_line_size = 0 | |
for cmnt in all_ips.keys(): | |
# print("Found", len(all_ips[cmnt]), "IPs for '"+cmnt+"'", file=sys.stderr) | |
print_loop_start() | |
current_loop_size = 0 | |
current_line_size = 0 | |
while(all_ips[cmnt]): | |
current_line_size += 1 | |
current_loop_size += 1 | |
if current_loop_size > max_loop_size: | |
print_loop_end(cmnt) | |
print_loop_start() | |
current_loop_size = 0 | |
current_line_size = 0 | |
elif current_line_size > max_line_size: | |
print('\\') | |
print(' ', end='') | |
current_line_size = 1 | |
ip = all_ips[cmnt].pop() | |
print(ip, '', end='') | |
# current_loop_size = 0 | |
print_loop_end(cmnt) | |
def block_parser(data): | |
""" Parses the 'pre' block from the HTML """ | |
# Store the data into a dict | |
all_ips = defaultdict(list) | |
comment = '"'+comment_prefix+page_name.split('-')[0].capitalize()+'"' if 2 == comment_style else '' | |
# There is one line in one file that the parser really doesn't like because the HTML Parser does not like bare '&'. Oh well. | |
for line in data.splitlines(): | |
if blank_re.match(line): # Handle blanks | |
continue | |
if '#' == line[0]: # Update comment | |
if 1 == comment_style: | |
comment = '"'+comment_prefix+''.join(c for c in line[1:] if c not in '#\'":').lstrip() # remove things bash won't like | |
comment = comment.replace("follow", '').replace("IP addresses", '').rstrip()+'"' | |
continue | |
# Check if it looks like an IP or not | |
if not ip_re.match(line): | |
print('"'+line+'" does not look like an IP!', file=sys.stderr) | |
continue | |
# What remains is what we want | |
# print(pre_text, line, post_text, comment_text+' '+comment if comment and comment_text else '') | |
# all_ips.append([line, comment_text+' '+comment if comment and comment_text else '']) | |
this_comment = comment_text+' '+comment if comment and comment_text else '' | |
all_ips[this_comment].append(line) | |
# At end of loop, output the IP/comment pairs as a bash loop (original = 318719, final = 67181) | |
print_ip_bash_loop(all_ips) | |
class MyHTMLParser(HTMLParser): | |
""" My custom HTML parser; looks for "pre" tags and sends them to block_parser """ | |
def __init__(self): | |
HTMLParser.__init__(self) | |
self.in_pre = False | |
def handle_starttag(self, tag, attrs): | |
self.in_pre = 'pre' == tag | |
def handle_endtag(self, tag): | |
self.in_pre = False | |
def handle_data(self, data): | |
if self.in_pre: | |
block_parser(data) | |
# main | |
print("#!/bin/sh") | |
parser = MyHTMLParser() | |
for page in source_pages: | |
page_name = page.split('/')[-1] | |
print("#", page_name, "follows:") | |
if (local_debug): | |
print("Faking", page, file=sys.stderr) | |
with open(page_name) as f: | |
parser.feed(f.read()) | |
else: | |
print("Pulling", page, file=sys.stderr) | |
parser.feed(urllib2.urlopen(page).read()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment