Skip to content

Instantly share code, notes, and snippets.

@baryluk
Last active March 3, 2024 00:38
Show Gist options
  • Save baryluk/09cbabb215351117b32aee994e5619a0 to your computer and use it in GitHub Desktop.
Save baryluk/09cbabb215351117b32aee994e5619a0 to your computer and use it in GitHub Desktop.
Graph binary / library / dynamic library / shared object / so / ELF files, dependencies as a graph
#!/usr/bin/env python3
# Copyright: Witold Baryluk, 2019-2024. MIT license
# This small program takes one parameter, a binary (or library), and outputs
# a dependency graph. This is done recursively for all subdependencies.
# Some common dependencies are ignored like this ones to glibc basic libraries.
# The ones related to stdc++ / gcc are not ignored (otherwise the graph would be very dense).
#
# To generate and render dependency graph in one go, use something like this:
#
# ./library_dependencies.py ${BINARY} | dot -Grankdir=LR -Nshape=box -Tpng -o dependencies.png /dev/fd/0
#
# TODO(baryluk): Make it more parallel.
# TODO(baryluk): Use `/lib/ld-linux.so.2 --list`, /lib64/ld-linux-x86-64.so.2 --list or ldd directly somehow?
# TODO(baryluk): Name it something short. ldx? ldv? lld?
# TODO(baryluk): Also traverse LD_LIBRARY_PATH
#
# LD_DEBUG=libs,files /bin/ls 2>&1
# LD_DEBUG=libs,files LD_DEBUG_OUTPUT=output_log.txt /bin/ls
#
# Can be used to capture so info, but it is unsecure.
#
# Similarly `ld-linux.so.2 --list` and `ldd` are not secure.
# ldd /bin/ls is basically equivalent do running LD_TRACE_LOADED_OBJECTS=1 ld-linux.so.2 /bin/ls
# In fact ldd is just a bash script that detects elf object architecture and invokes
# proper dynamic linker with proper flags and environment variables.
# but dynamic linker will most likely still invoke init/fini sections of elf objects!
import os.path
import re
import subprocess
import sys
# Note, that libstdc++.so.6 and libgcc_s.so.1 are not in this list on purpose!
IGNORED = {
"ld-linux.so.2",
"ld-linux-x86-64.so.2",
"libc.so.6",
"libm.so.6",
"libdl.so.2",
"libpthread.so.0",
"librt.so.1",
}
def configure_paths(ld_conf: str) -> list[str]:
paths: list[str] = []
for path in open(ld_conf).readlines():
path = path.rstrip()
if not path or path.startswith("#"):
continue
paths.append(path)
return paths
def find_binary(binary: str, paths: list[str]) -> str:
if os.path.exists(binary):
return binary
for path in paths:
full_path = f"{path}/{binary}"
if os.path.exists(full_path):
return full_path
def expand(origin, binary, name, values):
if not values:
return []
origin = origin or (binary.rsplit("/", 1)[0] if "/" in binary else "")
# TODO(baryluk): Support $LIB and $PLATFORM
value = values[0]
if "$ORIGIN" in value or "${ORIGIN}" in value:
print(f"binary {binary} has {name} with $ORIGIN: {value}", file=sys.stderr)
value = value.replace("$ORIGIN", origin).replace("${ORIGIN}", origin)
print(f" after $ORIGIN subsitution: {value}", file=sys.stderr)
return value.split(":")
def concat(a, b):
if a.endswith("/"):
return f"{a}{b}"
if not a:
return b
return f"{a}/{b}"
already_processed = set()
def maybe_recurse_dependencies(origin: str, binary: str, name: str, level: int, paths: list[str], parent_rpaths: list[str] = []) -> None:
global already_processed
if binary in already_processed:
return
already_processed.add(binary)
binary2 = find_binary(binary, paths)
assert binary2, f"Could not find {binary} in paths"
binary = binary2
o = subprocess.run(["objdump", "-p", binary], capture_output=True, check=True, text=True).stdout
dependencies: list[str] = [line.split(maxsplit=1)[1] for line in o.splitlines() if line.startswith(" NEEDED ")]
# Also known as DT_RUNPATH
runpath: list[str] = [line.split(maxsplit=1)[1] for line in o.splitlines() if line.startswith(" RUNPATH ")]
runpath = expand(origin, binary, "RUNPATH", runpath)
rpath: list[str] = [line.split(maxsplit=1)[1] for line in o.splitlines() if line.startswith(" RPATH ")]
rpaths = expand(origin, binary, "RPATH", rpath) or parent_rpaths
for dependency in dependencies:
# print(dependency)
if dependency in IGNORED:
continue
final_dependency = dependency
found = False
if "/" not in dependency:
for rpath_element in rpaths:
if os.path.exists(concat(rpath_element, dependency)):
final_dependency = concat(rpath_element, dependency)
found = True
break
if not found and runpath:
if os.path.exists(concat(runpath[0], dependency)):
final_dependency = concat(runpath[0], dependency)
found = True
if found:
print(f' "{dependency}" [style=filled, fillcolor=green];')
print(f' "{name}" -> "{dependency}";')
maybe_recurse_dependencies(final_dependency.split("/", 1)[0] if "/" in final_dependency else ".", final_dependency, dependency, level + 1, paths, rpaths)
def main():
binary: str = sys.argv[1]
name: str = binary
if "64-bit" in subprocess.run(["file", "--dereference", binary], capture_output=True, check=True, text=True).stdout:
paths: list[str] = configure_paths("/etc/ld.so.conf.d/x86_64-linux-gnu.conf")
else:
paths: list[str] = configure_paths("/etc/ld.so.conf.d/i386-linux-gnu.conf")
print("digraph {")
print(f' "{name}" [style=filled, fillcolor=green];')
maybe_recurse_dependencies("." if "/" not in binary else binary.split("/", 1)[0], binary, name, 0, paths, [])
print("}")
if __name__ == "__main__":
main()
@baryluk
Copy link
Author

baryluk commented Mar 3, 2024

Added some RPATH support, better $ORIGIN support, and coloring of local dependencies via rpath/runpath. I cannot guarantee it does work fully correct (I am sure there are cases where it will maybe even miss some things), but looks okish.

Example:

dependencies

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment