Created
December 12, 2018 15:35
-
-
Save PatrikHudak/2006c50a694cc76ead705c91805df78b to your computer and use it in GitHub Desktop.
Second-order subdomain takeover
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=utf-8 | |
# python3 | |
from urllib.parse import urlparse | |
import requests | |
import urllib3 | |
from bs4 import BeautifulSoup | |
# Disable SSL insecure warnings | |
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | |
# Timeout for all HTTP requests | |
GLOBAL_HTTP_TIMEOUT = 7 | |
# Set User-Agent for "OPSEC" | |
UA = { | |
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36" | |
} | |
def normalize_url(domain, src): | |
''' | |
(Try to) Normalize URL to its absolute form | |
''' | |
src = src.strip() | |
src = src.rstrip('/') | |
# Protocol relative URL | |
if src.startswith('//'): | |
return 'http:{}'.format(src) | |
# Relative URL with / | |
if src.startswith('/'): | |
return 'http://{}{}'.format(domain, src) | |
# Relative URL with ? | |
if src.startswith('?'): | |
return 'http://{}/{}'.format(domain, src) | |
# Relative URL with ./ | |
if src.startswith('./'): | |
return 'http://{}{}'.format(domain, src[1:]) | |
# Absolute URL | |
if src.startswith('https://') or src.startswith('http://'): | |
return src | |
# Else let's hope it is relative URL | |
return 'http://{}/{}'.format(domain, src) | |
def extract_javascript(domain, source_code): | |
''' | |
Extract and normalize external javascript files from HTML | |
''' | |
tree = BeautifulSoup(source_code, 'html.parser') | |
scripts = [normalize_url(domain, s.get('src')) for s in tree.find_all('script') if s.get('src')] | |
return list(set(scripts)) | |
def extract_links(domain, source_code): | |
''' | |
Extract and normalize links in HTML file | |
''' | |
tree = BeautifulSoup(source_code, 'html.parser') | |
hrefs = [normalize_url(domain, s.get('href')) for s in tree.find_all('a') if s.get('href')] | |
return list(set(hrefs)) | |
def extract_styles(domain, source_code): | |
''' | |
Extract and normalize CSS in HTML file | |
''' | |
tree = BeautifulSoup(source_code, 'html.parser') | |
hrefs = [normalize_url(domain, s.get('href')) for s in tree.find_all('link') if s.get('href')] | |
return list(set(hrefs)) | |
def extract_cors(headers): | |
cors = headers['Access-Control-Allow-Origin'].split(',') | |
if '*' in cors: | |
# Use your imagination here | |
return [] | |
return cors | |
def extract_domain(url): | |
'''Extracts domain name from given URL''' | |
return urlparse(url).netloc | |
if __name__ == '__main__': | |
# This is sample of intended functionality | |
# ---- | |
# Note that there is a missing functionality for showing | |
# origin domain name where takeover was discovered (if any) | |
# ---- | |
domains = [] # Database retrieval | |
results = {} | |
for d in domains: | |
for prefix in ['http://', 'https://']: | |
# Trying both HTTP and HTTPS where HTTPS has higher priority | |
# (Thus second in the list) | |
try: | |
r = requests.get('{}{}'.format(prefix, d), timeout=GLOBAL_HTTP_TIMEOUT, verify=False, headers=UA) | |
except: | |
pass | |
if r is None: | |
# Connection refused / NXDOMAIN / ... | |
continue | |
urls = extract_javascript(d, r.text) | |
urls += extract_links(d, r.text) | |
urls += extract_styles(d, r.text) | |
urls += extract_cors(r.headers) | |
# takeoverable = subdomain_takeover.check([extract_domain(u) for u in urls]) | |
# ... |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment