averagesecurityguy/ec2_match.py

## ec2_match.py
import os
import sys
import difflib
import requests
import socket

def get_html(domain):
    if domain is None: return None
    try:
        print '\tGetting {0}'.format(domain)
        response = requests.get('http://' + domain, timeout=30.0)

        if response.status_code == 200:
            return response.text

    except requests.exceptions.Timeout:
        print 'Timeout'
        return None

    except requests.exceptions.ConnectionError:
        print 'Connection Error'
        return None

    except requests.exceptions.TooManyRedirects:
        print 'Too Many Redirects'
        return None

    except requests.exceptions.InvalidURL:
        print 'Invalid URL'
        return None

    except socket.timeout:
        print 'Socket Timeout'
        return None

    except socket.error:
        print 'Socket Error'
        return None


def check_match(domain, ec2_name):
    print 'Checking for match between {0} and {1}.'.format(domain, ec2_name)
    d = get_html(domain)
    e = get_html(ec2_name)

    if (d is not None) and (e is not None):
        s = difflib.SequenceMatcher(None, d, e)
        if s.real_quick_ratio() < 0.4:
            textfile.write('Sites {0} and {1} do not match.\n'.format(domain, ec2_name))

            htmlfile.write('Sites <a href="http://{0}">{0}</a> and '.format(domain))
            htmlfile.write('<a href="http://{0}">{0}</a>'.format(ec2_name))
            htmlfile.write(' do not match<br />\n')
            print '\tSites Do Not Match\n'

        else:
            textfile.write('Sites {0} and {1} match.\n'.format(domain, ec2_name))
            print '\tSites Match\n'
    else:
        print '\tOne site returned no data.\n'


###############################################################################
#  MAIN PROGRAM                                                               #
###############################################################################
if len(sys.argv) != 2:
    print 'Usage: ec2_match.py domain_ec2_file'
    sys.exit(1)

filename = sys.argv[1]

textfile = open(filename.split('.')[0] + '_match.txt', 'w')
htmlfile = open(filename.split('.')[0] + '_no_match.html', 'w')
htmlfile.write('<!doctype html>\n<html><head></head><body>\n')

for line in open(filename):
    line = line.rstrip()
    domain, ec2_name = line.split(' ')
    check_match(domain, ec2_name)
    textfile.flush()
    htmlfile.flush()

htmlfile.write('<body>')
textfile.close()
htmlfile.close()
	import os
	import sys
	import difflib
	import requests
	import socket

	def get_html(domain):
	if domain is None: return None
	try:
	print '\tGetting {0}'.format(domain)
	response = requests.get('http://' + domain, timeout=30.0)

	if response.status_code == 200:
	return response.text

	except requests.exceptions.Timeout:
	print 'Timeout'
	return None

	except requests.exceptions.ConnectionError:
	print 'Connection Error'
	return None

	except requests.exceptions.TooManyRedirects:
	print 'Too Many Redirects'
	return None

	except requests.exceptions.InvalidURL:
	print 'Invalid URL'
	return None

	except socket.timeout:
	print 'Socket Timeout'
	return None

	except socket.error:
	print 'Socket Error'
	return None


	def check_match(domain, ec2_name):
	print 'Checking for match between {0} and {1}.'.format(domain, ec2_name)
	d = get_html(domain)
	e = get_html(ec2_name)

	if (d is not None) and (e is not None):
	s = difflib.SequenceMatcher(None, d, e)
	if s.real_quick_ratio() < 0.4:
	textfile.write('Sites {0} and {1} do not match.\n'.format(domain, ec2_name))

	htmlfile.write('Sites <a href="http://{0}">{0}</a> and '.format(domain))
	htmlfile.write('<a href="http://{0}">{0}</a>'.format(ec2_name))
	htmlfile.write(' do not match<br />\n')
	print '\tSites Do Not Match\n'

	else:
	textfile.write('Sites {0} and {1} match.\n'.format(domain, ec2_name))
	print '\tSites Match\n'
	else:
	print '\tOne site returned no data.\n'


	###############################################################################
	# MAIN PROGRAM #
	###############################################################################
	if len(sys.argv) != 2:
	print 'Usage: ec2_match.py domain_ec2_file'
	sys.exit(1)

	filename = sys.argv[1]

	textfile = open(filename.split('.')[0] + '_match.txt', 'w')
	htmlfile = open(filename.split('.')[0] + '_no_match.html', 'w')
	htmlfile.write('<!doctype html>\n<html><head></head><body>\n')

	for line in open(filename):
	line = line.rstrip()
	domain, ec2_name = line.split(' ')
	check_match(domain, ec2_name)
	textfile.flush()
	htmlfile.flush()

	htmlfile.write('<body>')
	textfile.close()
	htmlfile.close()