Skip to content

Instantly share code, notes, and snippets.

@TweekFawkes
Created April 19, 2018 16:28
Show Gist options
  • Star 9 You must be signed in to star a gist
  • Fork 7 You must be signed in to fork a gist
  • Save TweekFawkes/13440c60804e68b83914802ab43bd7a1 to your computer and use it in GitHub Desktop.
Save TweekFawkes/13440c60804e68b83914802ab43bd7a1 to your computer and use it in GitHub Desktop.
LolrusLove - Spider for Bucket Enumeration (AWS S3 Bucket, Azure Blob Storage, DigitalOcean Spaces, etc...) - Alpha v0.0.6
import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors.lxmlhtml import LxmlLinkExtractor
from scrapy.crawler import CrawlerProcess
#import boto3
import os
from argparse import ArgumentParser
import requests
import time
import logging
from urlparse import urlparse
import dns
import sys
# --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- #
__author__ = '@TweekFawkes'
__website__ = 'Stage2Sec.com'
__blog__ = 'https://Stage2Sec.com/blog/'
# --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- #
'''
--- LolrusLove - Spider for Bucket Enumeration (AWS S3 Bucket, Azure Blob Storage, DigitalOcean Spaces, etc...) - Alpha v0.0.6 ---
Spiders a website looking for links to known FQDNs used by s3 like bucket services (AWS, Azure, Digital Ocean).
Useful for bucket hijacking and takeovers.
v0.0.6 - Added in DNS lookups for FQDNs to see if they are an alias record for an s3 buckets (e.g. CNAME checks)
TODO:
- Automatically check via the API if the AWS s3 bucket is already registered (e.g. hijacking)
--- Example Usage ---
... look at the help ...
$ lolrusLove-v0_0_6.py -h
... set your AWS API keys so LolrusLove can check if the S3 bucket is already registered ...
$ export AWS_ACCESS_KEY_ID="REDACTED"
$ export AWS_SECRET_ACCESS_KEY="REDACTED"
$ printenv | grep "AWS_"
... Python 2.7.6 (default, Oct 26 2016, 20:30:19) ...
$ python lolrusLove-v0_0_6.py https://example.com/
... you can ignore screen output for the most part ...
$ head 20171127_142706-bucket_urls.txt
... you should see a list of urls with the complete bucket names found via the spider ...
# 20171127_142706 [+] START URL: https://example.com/
https://example.blob.core.windows.net/python-example/example.7z
...
--- Setup on Ubuntu 14.04 x64 ---
apt-get update
apt-get -y install python
apt-get -y install python-pip
pip install boto3
pip install -U pip setuptools
pip install pyOpenSSL
pip install scrapy
pip install enum34
pip install cryptography
apt-get -y install python-requests
...
$ python
Python 2.7.6 (default, Oct 26 2016, 20:30:19)
...
>>> exit()
--- References ---
- Example: Setting up a Static Website Using a Custom Domain
-- http://docs.aws.amazon.com/AmazonS3/latest/dev/website-hosting-custom-domain-walkthrough.html
- Scraping a domain for links recursively using Scrapy
-- https://stackoverflow.com/questions/46741211/scraping-a-domain-for-links-recursively-using-scrapy
'''
# --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- #
# User Defined Variables
#os.environ["AWS_ACCESS_KEY_ID"] = 'REDACTED' # INSECURE # It is better to set these as environment variables using the export command, for example: export AWS_ACCESS_KEY_ID="REDACTED"
#os.environ["AWS_SECRET_ACCESS_KEY"] = 'REDACTED' # INSECURE # It is better to set these as environment variables using the export command, for example: export AWS_SECRET_ACCESS_KEY="REDACTED"
# --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- #
# Automatically Defined Variables
sScriptName = os.path.basename(__file__)
sLogDirectory = 'logs'
sVersion = 'Alpha v0.0.6'
sUserAgentString = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'
lKeywords = ['windows.net', 'amazonaws.com', 'digitaloceanspaces.com']
# --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- #
# Get the Arguments
parser = ArgumentParser(add_help=True)
parser.add_argument('url',
action="store",
help="[required] url e.g.: https://marketing.example.com/developer")
parser.add_argument("-v", "--verbose",
action="store_false", dest="fVerbose", default=False,
help="print verbose status messages to stdout")
parser.add_argument("-d", "--debug",
action="store_false", dest="fDebug", default=False,
help="save debug status messages to a file")
args = parser.parse_args()
# --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- #
# Start Logging
if not os.path.exists(sLogDirectory):
os.makedirs(sLogDirectory)
# --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- #
# Logging to a file
if args.fDebug is True:
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
datefmt='%m-%d %H:%M',
filename=sLogDirectory+"/"+str(int(time.time()))+'-'+sScriptName+'.log',
filemode='w')
elif args.fDebug is False:
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
datefmt='%m-%d %H:%M',
filename=sLogDirectory+"/"+str(int(time.time()))+'-'+sScriptName+'.log',
filemode='w')
logger = logging.getLogger(__name__)
# --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- #
'''
CRITICAL 50
ERROR 40
WARNING 30
INFO 20
DEBUG 10
NOTSET 0
logging.debug("DEBUG")
logging.info("INFO")
logging.warning("WARNING")
logging.error("ERROR")
logging.critical("CRITICAL")
'''
# Logging to a stdout
console = logging.StreamHandler()
if args.fVerbose is True:
console.setLevel(logging.DEBUG)
fSpiderLoggingFlag = True
elif args.fVerbose is False:
console.setLevel(logging.CRITICAL)
fSpiderLoggingFlag = False
#console.setLevel(logging.WARNING) # this is what it was
formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
console.setFormatter(formatter)
logging.getLogger('').addHandler(console)
# --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- #
logging.critical("""
888 888 888
888 888 888
888 888 888
888 .d88b. 888 888d888 888 888 .d8888b 888 .d88b. 888 888 .d88b.
888 d88""88b 888 888P" 888 888 88K 888 d88""88b 888 888 d8P Y8b
888 888 888 888 888 888 888 "Y8888b. 888 888 888 Y88 88P 88888888
888 Y88..88P 888 888 Y88b 888 X88 888 Y88..88P Y8bd8P Y8b.
88888888 "Y88P" 888 888 "Y88888 88888P' 88888888 "Y88P" Y88P "Y8888
""")
logging.critical("Alpha v0.0.6\n")
# --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- #
sStartUrl = str(args.url).strip()
logging.critical("[+] sStartUrl: "+sStartUrl)
# --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- #
def getTheDomainFromTheUrl( sUrl ):
parsed_uri = urlparse( sUrl )
sAllowedDomain = parsed_uri.netloc
return sAllowedDomain
# --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- #
sAllowedDomain = getTheDomainFromTheUrl( sStartUrl )
logging.critical("[+] sAllowedDomain: "+sAllowedDomain)
# --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- #
from time import gmtime, strftime
sCurrentTime = strftime("%Y%m%d_%H%M%S", gmtime())
# --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- #
sOutputFileName = sCurrentTime + '-bucket_urls.txt'
file = open(sOutputFileName,'w')
file.write('# ' + sCurrentTime + ' [+] START URL: ' + sStartUrl + "\n")
file.close()
# --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- #
def logTheUrl(sUrl):
file = open(sOutputFileName, 'a')
file.write(sUrl + "\n")
file.close()
# --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- #
def uniqList(lUniqDomains):
setUniqDomains = set(lUniqDomains)
return list(setUniqDomains)
# --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- #
lUniqDomains = []
lUniqDomains.append( sAllowedDomain )
lUniqDomains = uniqList(lUniqDomains)
# --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- #
logging.critical("[+] lKeywords: " + str(lKeywords))
# --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- #
class UrlsSpider(scrapy.Spider):
name = 'lolrusLove'
allowed_domains = [sAllowedDomain]
start_urls = [sStartUrl]
rules = (Rule(LxmlLinkExtractor(allow=(), unique=True), callback='parse', follow=True))
def parse(self, response):
for link in LxmlLinkExtractor(deny_domains=self.allowed_domains, unique=True).extract_links(response):
sUrl = str(link.url)
logging.critical("[+] sUrl: " + sUrl)
for sKeyword in lKeywords:
if sKeyword in sUrl:
logging.critical("[+] link.url: " + link.url)
logTheUrl(link.url)
try:
from urlparse import urlparse
parsed = urlparse(sUrl)
print parsed.hostname
try:
answers = dns.resolver.query(parsed.hostname, 'CNAME')
print ' query qname:', answers.qname, ' num ans.', len(answers)
for rdata in answers:
print ' cname target address:', rdata.target
except:
print "002 - Unexpected error:", sys.exc_info()[0]
except:
print "001 - Unexpected error:", sys.exc_info()[0]
for link in LxmlLinkExtractor(allow_domains=self.allowed_domains, unique=True).extract_links(response):
yield scrapy.Request(link.url, callback=self.parse)
#print "!!!"
# --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- #
process = CrawlerProcess({
'USER_AGENT': sUserAgentString
})
process.crawl(UrlsSpider)
process.start() # the script will block here until the crawling is finished
# --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- # --- #
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment