Skip to content

Instantly share code, notes, and snippets.

Created December 12, 2018 15:35
  • Star 9 You must be signed in to star a gist
  • Fork 7 You must be signed in to fork a gist
Star You must be signed in to star a gist
What would you like to do?
Second-order subdomain takeover
# coding=utf-8
# python3
from urllib.parse import urlparse
import requests
import urllib3
from bs4 import BeautifulSoup
# Disable SSL insecure warnings
# Timeout for all HTTP requests
# Set User-Agent for "OPSEC"
UA = {
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36"
def normalize_url(domain, src):
(Try to) Normalize URL to its absolute form
src = src.strip()
src = src.rstrip('/')
# Protocol relative URL
if src.startswith('//'):
return 'http:{}'.format(src)
# Relative URL with /
if src.startswith('/'):
return 'http://{}{}'.format(domain, src)
# Relative URL with ?
if src.startswith('?'):
return 'http://{}/{}'.format(domain, src)
# Relative URL with ./
if src.startswith('./'):
return 'http://{}{}'.format(domain, src[1:])
# Absolute URL
if src.startswith('https://') or src.startswith('http://'):
return src
# Else let's hope it is relative URL
return 'http://{}/{}'.format(domain, src)
def extract_javascript(domain, source_code):
Extract and normalize external javascript files from HTML
tree = BeautifulSoup(source_code, 'html.parser')
scripts = [normalize_url(domain, s.get('src')) for s in tree.find_all('script') if s.get('src')]
return list(set(scripts))
def extract_links(domain, source_code):
Extract and normalize links in HTML file
tree = BeautifulSoup(source_code, 'html.parser')
hrefs = [normalize_url(domain, s.get('href')) for s in tree.find_all('a') if s.get('href')]
return list(set(hrefs))
def extract_styles(domain, source_code):
Extract and normalize CSS in HTML file
tree = BeautifulSoup(source_code, 'html.parser')
hrefs = [normalize_url(domain, s.get('href')) for s in tree.find_all('link') if s.get('href')]
return list(set(hrefs))
def extract_cors(headers):
cors = headers['Access-Control-Allow-Origin'].split(',')
if '*' in cors:
# Use your imagination here
return []
return cors
def extract_domain(url):
'''Extracts domain name from given URL'''
return urlparse(url).netloc
if __name__ == '__main__':
# This is sample of intended functionality
# ----
# Note that there is a missing functionality for showing
# origin domain name where takeover was discovered (if any)
# ----
domains = [] # Database retrieval
results = {}
for d in domains:
for prefix in ['http://', 'https://']:
# Trying both HTTP and HTTPS where HTTPS has higher priority
# (Thus second in the list)
r = requests.get('{}{}'.format(prefix, d), timeout=GLOBAL_HTTP_TIMEOUT, verify=False, headers=UA)
if r is None:
# Connection refused / NXDOMAIN / ...
urls = extract_javascript(d, r.text)
urls += extract_links(d, r.text)
urls += extract_styles(d, r.text)
urls += extract_cors(r.headers)
# takeoverable = subdomain_takeover.check([extract_domain(u) for u in urls])
# ...
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment