Skip to content

Instantly share code, notes, and snippets.

@RevRagnarok
Last active December 1, 2018 22:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save RevRagnarok/9aec1f2b3029b78165c2ac09230c4dc5 to your computer and use it in GitHub Desktop.
Save RevRagnarok/9aec1f2b3029b78165c2ac09230c4dc5 to your computer and use it in GitHub Desktop.
Block crazy IPs
#!/bin/env python
from __future__ import print_function
from collections import defaultdict
import os.path
import re
import sys
import urllib2
from HTMLParser import HTMLParser
"""
This script will pull IP ranges out of the "pre" sections of websites and manipulate them
to generate iptables rules. The output is STDOUT; do with it what you need.
Released as GPL by RevRagnarok 2018
"""
# The IP host pages
source_pages = ['https://www.wizcrafts.net/chinese-iptables-blocklist.html',
'https://www.wizcrafts.net/nigerian-iptables-blocklist.html',
'https://www.wizcrafts.net/russian-iptables-blocklist.html',
'https://www.wizcrafts.net/lacnic-iptables-blocklist.html',
]
# What you need before and after the IP (this example is OpenWRT):
pre_text = 'iptables -A input_rule --src'
post_text = '-j DROP'
# Comment style:
# 0 = none
# 1 = last comment found in line (looks nice but more memory taken)
# 2 = just filename
comment_style = 2
# Comment prefix (if you want to easily be able to grep for statistics)
comment_prefix = 'XXX:'
#comment_prefix = ''
# Debugging
# If set to true, uses local copies of the files
local_debug = False
### End user-serviceable parts
comment_text = '-m comment --comment' if comment_style != 0 else ''
# Global regex for IP ranges
ip_re = re.compile("(\d+\.){3}\d(/\d+)?")
# Global regex for lines to skip
blank_re = re.compile('^\s*$')
# Global page being parsed (for comment style 2)
page_name = ''
def print_loop_start():
""" Start of bash loop """
print("for ip in ", end='')
def print_loop_end(comment):
""" End of bash loop """
print("; do")
print(' ', pre_text, '${ip}', post_text, comment)
print('done')
def print_ip_bash_loop(all_ips):
""" Takes a dict of comment keys that have list of IPs and creates bash loops """
max_loop_size = 100 # How many IPs max per bash loop
max_line_size = 10 # How many IPs max before newline inserted
current_loop_size = 0
current_line_size = 0
for cmnt in all_ips.keys():
# print("Found", len(all_ips[cmnt]), "IPs for '"+cmnt+"'", file=sys.stderr)
print_loop_start()
current_loop_size = 0
current_line_size = 0
while(all_ips[cmnt]):
current_line_size += 1
current_loop_size += 1
if current_loop_size > max_loop_size:
print_loop_end(cmnt)
print_loop_start()
current_loop_size = 0
current_line_size = 0
elif current_line_size > max_line_size:
print('\\')
print(' ', end='')
current_line_size = 1
ip = all_ips[cmnt].pop()
print(ip, '', end='')
# current_loop_size = 0
print_loop_end(cmnt)
def block_parser(data):
""" Parses the 'pre' block from the HTML """
# Store the data into a dict
all_ips = defaultdict(list)
comment = '"'+comment_prefix+page_name.split('-')[0].capitalize()+'"' if 2 == comment_style else ''
# There is one line in one file that the parser really doesn't like because the HTML Parser does not like bare '&'. Oh well.
for line in data.splitlines():
if blank_re.match(line): # Handle blanks
continue
if '#' == line[0]: # Update comment
if 1 == comment_style:
comment = '"'+comment_prefix+''.join(c for c in line[1:] if c not in '#\'":').lstrip() # remove things bash won't like
comment = comment.replace("follow", '').replace("IP addresses", '').rstrip()+'"'
continue
# Check if it looks like an IP or not
if not ip_re.match(line):
print('"'+line+'" does not look like an IP!', file=sys.stderr)
continue
# What remains is what we want
# print(pre_text, line, post_text, comment_text+' '+comment if comment and comment_text else '')
# all_ips.append([line, comment_text+' '+comment if comment and comment_text else ''])
this_comment = comment_text+' '+comment if comment and comment_text else ''
all_ips[this_comment].append(line)
# At end of loop, output the IP/comment pairs as a bash loop (original = 318719, final = 67181)
print_ip_bash_loop(all_ips)
class MyHTMLParser(HTMLParser):
""" My custom HTML parser; looks for "pre" tags and sends them to block_parser """
def __init__(self):
HTMLParser.__init__(self)
self.in_pre = False
def handle_starttag(self, tag, attrs):
self.in_pre = 'pre' == tag
def handle_endtag(self, tag):
self.in_pre = False
def handle_data(self, data):
if self.in_pre:
block_parser(data)
# main
print("#!/bin/sh")
parser = MyHTMLParser()
for page in source_pages:
page_name = page.split('/')[-1]
print("#", page_name, "follows:")
if (local_debug):
print("Faking", page, file=sys.stderr)
with open(page_name) as f:
parser.feed(f.read())
else:
print("Pulling", page, file=sys.stderr)
parser.feed(urllib2.urlopen(page).read())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment