Skip to content

Instantly share code, notes, and snippets.

@lanmaster53
Last active January 9, 2024 18:49
Show Gist options
  • Star 27 You must be signed in to star a gist
  • Fork 9 You must be signed in to fork a gist
  • Save lanmaster53/a0d3523279f3d1efdfe6d9dfc4da0d4a to your computer and use it in GitHub Desktop.
Save lanmaster53/a0d3523279f3d1efdfe6d9dfc4da0d4a to your computer and use it in GitHub Desktop.
'''
Based on the initial work of Digininja at https://github.com/digininja/CeWL. While CeWL is a script written
in Ruby that requires an independent crawl of a website in order to build a custom wordlist, Whey CeWLer
runs within Portswigger's Burp Suite and parses an already crawled sitemap to build a custom wordlist. It
does not have the meta data parsing capabilities that CeWL does, but it more than makes up for it in
convenience.
The name gets its origins from the CeWLer portion of the CO2 Burp extension by Jason Gillam, which is written
in Java and does something similar, but Whey CeWLer is a completely reimagined extension written in Python,
making it "way cooler".
Usage:
1. Point Burp Suite to Jython in the Extender > Options tab.
2. Install this extension manually in the Extender > Extensions tab.
3. Select an option for extension output (File, Console or UI).
4. Right-click on any element in the Target tab's hierarchical sitemap.
5. Select the Extensions > Create wordlist context menu item.
The wordlist is created to wherever you have the extension configured for output.
'''
from burp import IBurpExtender
from burp import IContextMenuFactory
from javax.swing import JMenuItem
from java.util import ArrayList, List
from HTMLParser import HTMLParser
from datetime import datetime
import re
COMMON_PASSWORDS = ['password']
TEXT_CONTENT_TYPES = ['text/html', 'application/xml', 'application/json', 'text/plain']
# helpful resource
# https://github.com/laconicwolf/burp-extensions/blob/master/GenerateForcedBrowseWordlist.py
class TagStripper(HTMLParser):
'''
Attempts to strip all tags from an HTML page recieved in the HTTP response. The remaining text
is appended to an array and then joined with " " for regex parsing.
'''
def __init__(self):
HTMLParser.__init__(self)
self.page_text = []
def handle_data(self, data):
self.page_text.append(data)
def handle_comment(self, data):
self.page_text.append(data)
def strip(self, html_page):
self.feed(html_page)
return " ".join(self.page_text)
class BurpExtender(IBurpExtender, IContextMenuFactory):
'''
BurpExtender Class as per Reference API.
'''
def registerExtenderCallbacks(self, callbacks):
'''
Registers the extension and initializes the root URLs and word list sets.
'''
self._callbacks = callbacks
self._helpers = callbacks.getHelpers()
self.context = None
self.roots = set()
self.word_list = set(COMMON_PASSWORDS)
callbacks.setExtensionName("Whey CeWLer")
callbacks.registerContextMenuFactory(self)
return
def createMenuItems(self, context):
'''
Invokes the "Create Wordlist" Menu.
'''
self.context = context
if context.getInvocationContext() == context.CONTEXT_TARGET_SITE_MAP_TREE:
menu_list = ArrayList()
menu_item = JMenuItem("Create Wordlist", actionPerformed=self.menu_action)
menu_list.add(menu_item)
return menu_list
return
def menu_action(self, event):
'''
Obtains the selected messages from the interface. Filters the sitmap for all messages containing
URLs within the selected messages' hierarchy. If so, the message is analyzed to create a word list.
'''
# get all first-level selected messages and store the URLs as roots to filter the sitemap
http_messages = self.context.getSelectedMessages()
for http_message in http_messages:
root = str(http_message.getUrl())
self.roots.add(root)
# get all sitemap entries associated with the selected messages and scrape them for words
for http_message in self._callbacks.getSiteMap(None):
url = http_message.getUrl().toString()
for root in self.roots:
# will scrape the same URL multiple times if the site map has stored multiple instances
# the site map stores multiple instances if it detects differences, so this is desirable
if url.startswith(root):
# only scrape if there is a response to scrape
http_response = http_message.getResponse()
if http_response:
self.get_words(url, http_response)
self.display_words()
return
def get_words(self, url, http_response):
'''
Checks the header for a text-based content type. If the content type is text-based, uses
the TagStripper class to parse out the text and runs a regex to create a wordlist based on
the regex criteria. The resulting words are added to the word_list set.
'''
response = self._helpers.analyzeResponse(http_response)
headers = response.getHeaders()[1:]
body = self._helpers.bytesToString(http_response[response.getBodyOffset():])
for header in headers:
name, value = [x.strip() for x in header.split(':', 1)]
if name.lower() == 'content-type':
content_type = value.split(';')[0].strip()
if content_type.lower() not in TEXT_CONTENT_TYPES:
return
tag_stripper = TagStripper()
page_text = tag_stripper.strip(body)
# alpha numerics and apostrophes
# at least 3 characters in length
word_candidates = re.findall(r"[\w']{3,}", page_text)
for word in word_candidates:
# strip apostrophes
word = word.replace("'", "")
# add the word to the list
self.word_list.add(word)
return
def display_words(self):
'''
Displays the word list to whatever Burp is configured for stdout.
'''
for word in sorted(self.word_list):
print word
return
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment