Skip to content

Instantly share code, notes, and snippets.

Last active December 21, 2022 22:53
What would you like to do?
Based on the initial work of Digininja at While CeWL is a script written
in Ruby that requires an independent crawl of a website in order to build a custom wordlist, Whey CeWLer
runs within Portswigger's Burp Suite and parses an already crawled sitemap to build a custom wordlist. It
does not have the meta data parsing capabilities that CeWL does, but it more than makes up for it in
The name gets its origins from the CeWLer portion of the CO2 Burp extension by Jason Gillam, which is written
in Java and does something similar, but Whey CeWLer is a completely reimagined extension written in Python,
making it "way cooler".
1. Point Burp Suite to Jython in the Extender > Options tab.
2. Install this extension manually in the Extender > Extensions tab.
3. Select an option for extension output (File, Console or UI).
4. Right-click on any element in the Target tab's hierarchical sitemap.
5. Select the Extensions > Create wordlist context menu item.
The wordlist is created to wherever you have the extension configured for output.
from burp import IBurpExtender
from burp import IContextMenuFactory
from javax.swing import JMenuItem
from java.util import ArrayList, List
from HTMLParser import HTMLParser
from datetime import datetime
import re
COMMON_PASSWORDS = ['password']
TEXT_CONTENT_TYPES = ['text/html', 'application/xml', 'application/json', 'text/plain']
# helpful resource
class TagStripper(HTMLParser):
Attempts to strip all tags from an HTML page recieved in the HTTP response. The remaining text
is appended to an array and then joined with " " for regex parsing.
def __init__(self):
self.page_text = []
def handle_data(self, data):
def handle_comment(self, data):
def strip(self, html_page):
return " ".join(self.page_text)
class BurpExtender(IBurpExtender, IContextMenuFactory):
BurpExtender Class as per Reference API.
def registerExtenderCallbacks(self, callbacks):
Registers the extension and initializes the root URLs and word list sets.
self._callbacks = callbacks
self._helpers = callbacks.getHelpers()
self.context = None
self.roots = set()
self.word_list = set(COMMON_PASSWORDS)
callbacks.setExtensionName("Whey CeWLer")
def createMenuItems(self, context):
Invokes the "Create Wordlist" Menu.
self.context = context
if context.getInvocationContext() == context.CONTEXT_TARGET_SITE_MAP_TREE:
menu_list = ArrayList()
menu_item = JMenuItem("Create Wordlist", actionPerformed=self.menu_action)
return menu_list
def menu_action(self, event):
Obtains the selected messages from the interface. Filters the sitmap for all messages containing
URLs within the selected messages' hierarchy. If so, the message is analyzed to create a word list.
# get all first-level selected messages and store the URLs as roots to filter the sitemap
http_messages = self.context.getSelectedMessages()
for http_message in http_messages:
root = str(http_message.getUrl())
# get all sitemap entries associated with the selected messages and scrape them for words
for http_message in self._callbacks.getSiteMap(None):
url = http_message.getUrl().toString()
for root in self.roots:
# will scrape the same URL multiple times if the site map has stored multiple instances
# the site map stores multiple instances if it detects differences, so this is desirable
if url.startswith(root):
# only scrape if there is a response to scrape
http_response = http_message.getResponse()
if http_response:
self.get_words(url, http_response)
def get_words(self, url, http_response):
Checks the header for a text-based content type. If the content type is text-based, uses
the TagStripper class to parse out the text and runs a regex to create a wordlist based on
the regex criteria. The resulting words are added to the word_list set.
response = self._helpers.analyzeResponse(http_response)
headers = response.getHeaders()[1:]
body = self._helpers.bytesToString(http_response[response.getBodyOffset():])
for header in headers:
name, value = [x.strip() for x in header.split(':', 1)]
if name.lower() == 'content-type':
content_type = value.split(';')[0].strip()
if content_type.lower() not in TEXT_CONTENT_TYPES:
tag_stripper = TagStripper()
page_text = tag_stripper.strip(body)
# alpha numerics and apostrophes
# at least 3 characters in length
word_candidates = re.findall(r"[\w']{3,}", page_text)
for word in word_candidates:
# strip apostrophes
word = word.replace("'", "")
# add the word to the list
def display_words(self):
Displays the word list to whatever Burp is configured for stdout.
for word in sorted(self.word_list):
print word
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment