Skip to content

Instantly share code, notes, and snippets.

@karanlyons
Last active December 4, 2023 23:07
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save karanlyons/21134b716cf55087fec6928ab52f5cb5 to your computer and use it in GitHub Desktop.
Save karanlyons/21134b716cf55087fec6928ab52f5cb5 to your computer and use it in GitHub Desktop.
Find Running Processes Referencing log4j

When run on a box, outputs a single row of JSON for every proc on the box that loads a jar/war that contains any files with 'log4j' in them, including precisely what triggered the match. For example (pretty printed here for clarity; note that this one is happily a false positive):

{
  "node": "HW0000001",
  "time": 1632617610.3860812,
  "pid": 78676,
  "cmd": "/usr/local/opt/openjdk/libexec/openjdk.jdk/Contents/Home/bin/java",
  "args": [
    "-Xms128M",
    "-Xmx4g",
    "-Dawt.useSystemAAFontSettings=lcd",
    "-Dswing.aatext=true",
    "-XX:+UseG1GC",
    "-classpath",
    "/usr/local/Cellar/jadx/1.2.0/libexec/lib/jadx-gui-1.2.0.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/jfontchooser-1.0.5.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/jadx-cli-1.2.0.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/jadx-core-1.2.0.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/android-29-clst.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/android-29-res.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/logback-classic-1.2.3.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/jadx-smali-input-1.2.0.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/jadx-java-convert-1.2.0.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/dx-1.16.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/jadx-dex-input-1.2.0.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/jadx-plugins-api-1.2.0.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/slf4j-api-1.7.30.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/baksmali-2.4.0.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/smali-2.4.0.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/util-2.4.0.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/jcommander-1.80.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/rsyntaxtextarea-3.1.1.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/image-viewer-1.2.3.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/gson-2.8.6.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/commons-text-1.9.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/commons-lang3-3.11.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/rxjava2-swing-0.3.7.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/rxjava-2.2.19.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/apksig-4.0.1.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/logback-core-1.2.3.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/reactive-streams-1.0.3.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/dexlib2-2.4.0.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/guava-29.0-jre.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/asm-8.0.1.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/failureaccess-1.0.1.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/jsr305-3.0.2.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/checker-qual-2.11.1.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/error_prone_annotations-2.3.4.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/j2objc-annotations-1.3.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/antlr-runtime-3.5.2.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/stringtemplate-3.2.1.jar:/usr/local/Cellar/jadx/1.2.0/libexec/lib/antlr-2.7.7.jar",
    "jadx.gui.JadxGUI",
    "/usr/local/Cellar/jadx/1.2.0/libexec/lib/logback-classic-1.2.3.jar"
  ],
  "pkgs": {
    "/usr/local/Cellar/jadx/1.2.0/libexec/lib/logback-classic-1.2.3.jar": {
      "hash": "fb53f8539e7fcb8f093a56e138112056ec1dc809ebb020b59d8a36a5ebac37e0",
      "manifest": "[elided for readme]",
      "matches": {
        "ch/qos/logback/classic/log4j/XMLLayout.class": [
          [21, "properties�<init>�()V�Code�LineNumberTable�LocalVariableTable�this�(Lch/qos/logback/classic/log4j/XMLLayout;�start�setLocationInfo�(Z)V�flag�getLocationInfo�()Z�\rsetProperties�\rgetProperties�doLayout�>(Lch/qos/logback/classic/spi/ILoggingEvent;)Ljava/lang/String;�step�3Lch/qos/logback/classic/spi/StackTraceElementProxy;�arr$�4[Lch/qos/logback/classic/spi/StackTraceElementProxy;�len$�i$�stepArray�immediateCallerData�Ljava/lang/StackTraceElement;�callerDataArray�[Ljava/lang/StackTraceElement;�entry�Entry�InnerClasses�Ljava/util/Map$Entry;�Ljava/util/Iterator;�entrySet�Ljava/util/Set;�propertyMap�Ljava/util/Map;�event�*Lch/qos/logback/classic/spi/ILoggingEvent;�tp�,Lch/qos/logback/classic/spi/IThrowableProxy;�LocalVariableTypeTable�;Ljava/util/Map$Entry<Ljava/lang/String;Ljava/lang/String;>;�LLjava/util/Set<Ljava/util/Map$Entry<Ljava/lang/String;Ljava/lang/String;>;>;�5Ljava/util/Map<Ljava/lang/String;Ljava/lang/String;>;�\rStackMapTable�h�getContentType�()Ljava/lang/String;�&(Ljava/lang/Object;)Ljava/lang/String;�Signature�LLch/qos/logback/core/LayoutBase<Lch/qos/logback/classic/spi/ILoggingEvent;>;�\n"],
          [22, "SourceFile�XMLLayout.java�T�U�I�J�M�J�java/lang/StringBuilder�T�O�P�Q�R�S�R�[�U�<log4j:event logger=\"\"\r\n"],
          [25, "�  <log4j:message>�</log4j:message>\r\n"],
          [26, "�  <log4j:throwable><![CDATA[�\r\n"],
          [27, "�]]></log4j:throwable>\r\n"],
          [28, "�  <log4j:locationInfo class=\"�                      method=\"\" file=\"\" line=\"\"/>\r\n"],
          [29, "�b�`�u�  <log4j:properties>�`�\n"],
          [31, "    <log4j:data� name='�java/lang/String�'� value='�\r� />�\r\n"],
          [32, "  </log4j:properties>�\r\n"],
          [33, "</log4j:event>\r\n"],
          [35, "�text/xml�(ch/qos/logback/classic/spi/ILoggingEvent�c�d�&ch/qos/logback/classic/log4j/XMLLayout�ch/qos/logback/core/LayoutBase�*ch/qos/logback/classic/spi/IThrowableProxy�\rjava/util/Map�\rjava/util/Set�java/util/Iterator�(I)V�capacity�()I�setLength�append�-(Ljava/lang/String;)Ljava/lang/StringBuilder;�\rgetLoggerName�%ch/qos/logback/core/helpers/Transform�\n"]
        ],
        "ch/qos/logback/classic/spi/CallerData.class": [
          [13, "SourceFile�CallerData.java�&�'�q�t�u�v�w�D�E�$�%�java/lang/StackTraceElement�x�y�org.apache.log4j.Category�org.slf4j.Logger�z�{�H�I�r�|�}�s�~�java/lang/String�?�&�java/lang/StringBuilder�?#?:?�w�#�%ch/qos/logback/classic/spi/CallerData�java/lang/Object�java/lang/Throwable�java/util/List�java/util/Iterator�\rgetStackTrace� ()[Ljava/lang/StackTraceElement;�getClassName�()Ljava/lang/String;�equals�(Ljava/lang/Object;)Z�\n"]
        ],
        "META-INF/maven/ch.qos.logback/logback-classic/pom.xml": [
          [53, "      <artifactId>log4j-over-slf4j</artifactId>\r\n"],
          [63, "    <!-- Must be after log4j-over-slf4j:\r\n"],
          [64, "         * we want to use the classes from log4j-over-slf4j (so it must come first);\r\n"],
          [65, "         * we want to use log4j.dtd from log4j. -->\r\n"],
          [67, "      <groupId>log4j</groupId>\r\n"],
          [68, "      <artifactId>log4j</artifactId>\r\n"]
        ]
      }
    }
  }
}

Might need root privileges in order to access all of /proc/$pid/{fd,cmdline} for processes not owned by the user. Depending on what is running on the box, this script may take a very long time to execute (e.g., lots of processes and open files) or may never terminate (e.g., FUSEd mountpoints). Running with some timeout (e.g. timeout 5m python find_log4j_procs.py) is recommended.

Dependencies:

  • python (2 or 3)
  • A Unix-like environment with /proc/$pid/cmdline
#!/usr/bin/env python
import json
import os
import re
import sys
import traceback
import zipfile
from collections import defaultdict
from contextlib import contextmanager
from hashlib import sha256
from io import BytesIO
from textwrap import dedent
from time import time
from xml.etree import ElementTree # Yes, I know.
if not hasattr(zipfile, 'BadZipFile'): zipfile.BadZipFile = zipfile.error
@contextmanager
def zipfile_cm(*args, **kwargs):
f = zipfile.ZipFile(*args, **kwargs)
@contextmanager
def subfile_cm(*args, **kwargs):
sf = f.open(*args, **kwargs)
if hasattr(sf, 'seekable') and sf.seekable():
try: yield sf
finally: sf.close()
else:
buf = BytesIO(sf.read())
try: yield buf
finally:
sf.close()
del(buf)
f.subfile_cm = subfile_cm
try: yield f
finally: f.close()
nodename = os.uname()[1]
self_pid = os.getpid()
class log(object):
def info(**d):
print(json.dumps(
dict(
node=nodename,
time=time(),
**d
),
separators=(',', ':'),
))
def warn(s, **d): log.info(warning=s, **d)
def error(s, **d): log.info(error=s, **d)
def warn_path_exc(exc): log.warn(exc.strerror, path=exc.filename)
def exc(exc): log.error(str(exc), traceback=traceback.format_exc())
for k, f in vars(log).items():
if callable(f): setattr(log, k, staticmethod(f))
def decode(b):
return re.sub(
u'[^\u000a\u000d\u0020-\u007e]+',
u'\ufffd',
b.decode('utf-8', 'replace')
)
ITEMS_RE = re.compile(r'[^;,]+(?:;[^:=]+:?=(?:(?:"[^"]*")|(?:[^;,]+)))*(?:,|$)')
SUB_ITEMS_RE = re.compile(
r'([^;,:=]+)(?:(?::([^=]*))?=((?:"[^"]*")|(?:[^;,]*)))?(?:;|,|$)'
)
def parse_manifest_value(value):
items = {}
for item in ITEMS_RE.findall(value):
sub_items = SUB_ITEMS_RE.findall(item)
pkg = {}
for key, attr_type, value in sub_items[1:]:
if value and value[0] == '"': value = value[1:-1].split(',')
if attr_type: pkg[key] = (attr_type, value)
else: pkg[key] = value
items[sub_items[0][0]] = pkg
if len(items) == 1:
key, value = next(iter(items.items()))
if len(value) == 0: items = key
return items
def parse_manifest(s, strip_per_entry_attrs=True):
entries = [
tuple(kv.strip() for kv in l.split(':', 1)) if l.strip() else (None, None)
for l in re.sub('[\r\n]+ ', '', s.strip()).splitlines()
]
if strip_per_entry_attrs:
stripped_entries = []
entry_attribute = False
for key, value in entries:
if key is None:
entry_attribute = False
continue
elif key == 'Name': entry_attribute = True
if entry_attribute: continue
stripped_entries.append((key, parse_manifest_value(value)))
return stripped_entries
else:
stripped_entries = []
attrs = {}
path = None
for key, value in entries:
if key is None: path = None
elif key == 'Name':
path = value
attrs[path] = {}
elif path: attrs[path][key] = value
else: stripped_entries.append((key, parse_manifest_value(value)))
return (stripped_entries, attrs)
def xml_to_dict(x, p=''):
children = defaultdict(list)
for el in x:
if len(el): v = xml_to_dict(el, p)
else: v = dedent(el.text).strip() if el.text is not None else None
children[el.tag[len(p):]].append(v)
return {
key: (
children
if len(children) != 1 or not isinstance(children[0], str)
else children[0]
)
for key, children in children.items()
}
def parse_xml(s):
try:
et = ElementTree.fromstring(s)
if et.tag[0] == '{': p = et.tag.split('}', 1)[0] + '}'
else: p = ''
return xml_to_dict(et, p)
except Exception as exc:
log.exc(exc)
return None
def search_bytes(needle, haystack, context):
length = len(haystack)
return [
decode(haystack[max(0, m.start()):min(m.end() + context, length)])
for m in re.finditer(needle, haystack)
]
pkg_cache = {}
def scan_pkg(
file_or_path,
slow_search,
fast_search=None,
report_paths=None,
all_manifests=True,
all_poms=True,
context=100,
max_depth=3,
):
f = open(file_or_path, 'rb') if isinstance(file_or_path, str) else file_or_path
# Hash check
f.seek(0)
pkg_hash = sha256(f.read()).hexdigest()
key = (pkg_hash, slow_search, fast_search, all_manifests, all_poms, context)
if key in pkg_cache:
depth, result = pkg_cache[key]
if depth >= max_depth:
if isinstance(file_or_path, str): f.close()
return (pkg_hash, result)
# Fast check
elif fast_search:
f.seek(0)
if fast_search not in f.read():
pkg_cache[key] = (max_depth, None)
if isinstance(file_or_path, str): f.close()
return (pkg_hash, None)
# Unpack and scan
matches = {}
with zipfile_cm(f, 'r') as jar_file:
for subfile_path in jar_file.namelist():
with jar_file.subfile_cm(subfile_path) as subfile:
found = None
if subfile_path == 'META-INF/MANIFEST.MF':
if (
all_manifests
or re.search(
slow_search,
re.sub(b'[\r\n]+ ', b'', subfile.read())
)
):
subfile.seek(0)
try: found = parse_manifest(decode(subfile.read()))
except Exception as exc: log.exc(exc)
elif subfile_path.endswith('pom.xml'):
if all_poms or re.search(slow_search, subfile.read()):
subfile.seek(0)
try: found = parse_xml(subfile.read())
except Exception as exc: log.exc(exc)
elif subfile_path.endswith(('.jar', '.war')):
if max_depth > 0:
subfile.seek(0)
try:
found = scan_pkg(
subfile,
slow_search,
fast_search,
report_paths,
all_manifests,
all_poms,
context,
max_depth - 1,
)
except zipfile.BadZipFile as exc:
found = False
log.error(str(exc), path='>%s' % subfile_path)
else:
log.warn(
'maximum jar recursion depth exceeded',
path='>%s' % subfile_path,
)
if not found:
subfile.seek(0)
try: found = search_bytes(slow_search, subfile.read(), context)
except Exception as exc: log.exc(exc)
if report_paths is not None and not found:
if re.match(report_paths, subfile_path):
found = True
if found: matches[subfile_path] = found
pkg_cache[key] = (max_depth, matches)
if not isinstance(file_or_path, str): f.close()
return (pkg_hash, matches)
def can_run():
for path in ('/proc', '/proc/%s/cmdline' % self_pid, '/proc/%s/fd' % self_pid):
if not os.path.exists(path):
log.error('path does not exist', path=path)
return False
try:
with open('/proc/%s/cmdline' % self_pid) as f: _ = f.read()
for fd in os.listdir('/proc/%s/fd' % self_pid)[:-1]:
_ = os.readlink('/proc/%s/fd/%s' % (self_pid, fd))
except (OSError, IOError) as exc:
log.error('no /proc access', path=exc.filename)
return False
return True
def run(*args, **kwargs):
pkg_hash_by_path = {}
pkgs_by_hash = {}
for pid in os.listdir('/proc/'):
try:
if not re.match(r'^\d+$', pid): continue
pid = int(pid)
if pid == self_pid: continue
out = {
'pid': pid,
'cmd': None,
'args': None,
'hashes': {}
}
loads_jars = False
try:
fds = os.listdir('/proc/%s/fd' % pid)
except (OSError, IOError) as exc:
log.warn_path_exc(exc)
continue
for fd in fds:
try:
path = os.readlink('/proc/%s/fd/%s' % (pid, fd))
if path.startswith('/proc/') or not path.endswith(('.jar', '.war')):
continue
loads_jars = True
if path not in pkg_hash_by_path:
pkg_hash, result = scan_pkg(path, *args, **kwargs)
pkg_hash_by_path[path] = pkg_hash
pkgs_by_hash[pkg_hash] = result
if pkg_hash_by_path[path]:
out['hashes'][path] = pkg_hash_by_path[path]
except (OSError, IOError) as exc:
log.warn_path_exc(exc)
continue
except Exception as exc:
log.exc(exc)
if loads_jars or out['hashes']:
try:
with open('/proc/%s/cmdline' % out['pid'], 'r') as f:
cmdline = f.read()[:-1].split('\x00')
out['cmd'], out['args'] = cmdline[0], cmdline[1:]
except (OSError, IOError) as exc:
log.warn_path_exc(exc)
log.info(**out)
except Exception as exc:
log.exc(exc)
if pkgs_by_hash: log.info(pkgs=pkgs_by_hash)
if __name__ == '__main__':
if can_run() is False:
sys.exit(1)
run(
slow_search=b'log4j',
report_paths=r'.*/lookup/.*Lookup.class',
context=100,
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment