Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Differential Fuzzing of Regex using Atheris
import sys, re
import atheris
from urllib.parse import urlparse
# Our sketchy regex to be tested
OurRegex = re.compile(b"^(((([A-Za-z0-9.-]*\.)?example1\.com)|(([A-Za-z0-9.-]*\.)\?example2\.com)|(([A-Za-z0-9.-]*\.)?example3\.com)))")
# The allow list of domains the regex is trying to validate
Allowlist = [b"", b"", b""]
# Our Fuzzing Harness
def TestOneInput(data):
# Arbitrary, but lets get a minimum of 5 bytes of fuzz data
if len(data) < 5:
# We use the first byte as a random value selector of one of the three allowed domains
# and we append the domain to the rest of the fuzzer test data
# Test will look something like this: <FUZZ DATA>, <FUZZ DATA>, <FUZZ DATA>
test = data[1:] + Allowlist[data[0] % len(Allowlist)]
# We process our test case through the regex
RegexResult = OurRegex.match(test)
# If the regex didn't validate it as trusted there is no point in processing
# it through urllib, just return
if not RegexResult:
# We have a trusted input, lets compare it to urllib.
# urllib will throw exception at malformed UTF-8 so
# we place it inside a try block, return on exception
# urlib also requires a scheme, so we give it https
UrllibResult = urlparse(b"https://" + test)
# At this point we have results from urllib
# lets validate that our RegEx-trusted input countains at least 1 of the trusted domains
for domain in Allowlist:
# For each domain in the Allowlist we result if we see any sign of it
if domain in UrllibResult.netloc:
# If we got this far it means that we have an input deemed trusted by our regex
# but urllib did not find any of the allowlist domains inside the authority string
# of the parsed URL, raise an exception to the fuzzer
print ("\n\n\n\n==================================================================")
print ("(SEVERE): Found a potential bypass!")
print ("\n Payload: %s"% (test))
print ("Urllib Authority: %s\n"% (UrllibResult.netloc))
print ("Note: When parsing this input with urllib it appears that none ")
print ("of the allow list domains were found in the authority!")
print ("==================================================================\n\n")
raise RuntimeError("Fuzzer found a discrepency")
atheris.Setup(sys.argv, TestOneInput)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment