Created
November 30, 2012 02:54
-
-
Save averagesecurityguy/4173509 to your computer and use it in GitHub Desktop.
Find Domains That Do Not Match Amazon EC2 Name
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import difflib | |
import requests | |
import socket | |
def get_html(domain): | |
if domain is None: return None | |
try: | |
print '\tGetting {0}'.format(domain) | |
response = requests.get('http://' + domain, timeout=30.0) | |
if response.status_code == 200: | |
return response.text | |
except requests.exceptions.Timeout: | |
print 'Timeout' | |
return None | |
except requests.exceptions.ConnectionError: | |
print 'Connection Error' | |
return None | |
except requests.exceptions.TooManyRedirects: | |
print 'Too Many Redirects' | |
return None | |
except requests.exceptions.InvalidURL: | |
print 'Invalid URL' | |
return None | |
except socket.timeout: | |
print 'Socket Timeout' | |
return None | |
except socket.error: | |
print 'Socket Error' | |
return None | |
def check_match(domain, ec2_name): | |
print 'Checking for match between {0} and {1}.'.format(domain, ec2_name) | |
d = get_html(domain) | |
e = get_html(ec2_name) | |
if (d is not None) and (e is not None): | |
s = difflib.SequenceMatcher(None, d, e) | |
if s.real_quick_ratio() < 0.4: | |
textfile.write('Sites {0} and {1} do not match.\n'.format(domain, ec2_name)) | |
htmlfile.write('Sites <a href="http://{0}">{0}</a> and '.format(domain)) | |
htmlfile.write('<a href="http://{0}">{0}</a>'.format(ec2_name)) | |
htmlfile.write(' do not match<br />\n') | |
print '\tSites Do Not Match\n' | |
else: | |
textfile.write('Sites {0} and {1} match.\n'.format(domain, ec2_name)) | |
print '\tSites Match\n' | |
else: | |
print '\tOne site returned no data.\n' | |
############################################################################### | |
# MAIN PROGRAM # | |
############################################################################### | |
if len(sys.argv) != 2: | |
print 'Usage: ec2_match.py domain_ec2_file' | |
sys.exit(1) | |
filename = sys.argv[1] | |
textfile = open(filename.split('.')[0] + '_match.txt', 'w') | |
htmlfile = open(filename.split('.')[0] + '_no_match.html', 'w') | |
htmlfile.write('<!doctype html>\n<html><head></head><body>\n') | |
for line in open(filename): | |
line = line.rstrip() | |
domain, ec2_name = line.split(' ') | |
check_match(domain, ec2_name) | |
textfile.flush() | |
htmlfile.flush() | |
htmlfile.write('<body>') | |
textfile.close() | |
htmlfile.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment