import xml.etree.ElementTree as ET
import urllib
import base64
import math
import sys
import re
# usage: Open Burp, navigate to proxy history, ctrl-a to select all records, right click and "Save Items" as an .xml file.
# python burprequests.xml
# output is saved to wordlist.txt
def entropy(string):
#"Calculates the Shannon entropy of a string"
# get probability of chars in string
prob = [ float(string.count(c)) / len(string) for c in dict.fromkeys(list(string)) ]
# calculate the entropy
entropy = - sum([ p * math.log(p) / math.log(2.0) for p in prob ])
return entropy
def avgEntropyByChar(en,length):
# calulate "average" entropy level
return en / length
tree = ET.parse(sys.argv[1])
root = tree.getroot()
wordlist = []
for i in root:
# preserve subdomains, file/dir names with . - _
wordlist += re.split('\/|\?|&|=',i[1].text)
# get subdomain names and break up file names
wordlist += re.split('\/|\?|&|=|_|-|\.|\+',i[1].text)
# get words from cookies, headers, POST body requests
wordlist += re.split('\/|\?|&|=|_|-|\.|\+|\:| |\n|\r|"|\'|<|>|{|}|\[|\]|`|~|\!|@|#|\$|;|,|\(|\)|\*|\|', urllib.unquote(base64.b64decode(i[8].text)))
# response
if i[12].text is not None:
wordlist += re.split('\/|\?|&|=|_|-|\.|\+|\:| |\n|\r|\t|"|\'|<|>|{|}|\[|\]|`|~|\!|@|#|\$|;|,|\(|\)|\*|\^|\\\\|\|', urllib.unquote(base64.b64decode(i[12].text)))
auxiliaryList = list(set(wordlist))
final = []
avgEntropyByLength = {}
for word in auxiliaryList:
if word.isalnum() or '-' in word or '.' in word or '_' in word:
en = entropy(word)
# remove "random strings" that are high entropy
if en < 4.4:
with open('wordlist.txt', 'w') as f:
for item in final:
f.write("%s\n" % item)
print "wordlist saved to wordlist.txt"
