Skip to content

Instantly share code, notes, and snippets.

@baryluk
Last active March 3, 2024 00:38
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save baryluk/09cbabb215351117b32aee994e5619a0 to your computer and use it in GitHub Desktop.
Save baryluk/09cbabb215351117b32aee994e5619a0 to your computer and use it in GitHub Desktop.
Graph binary / library / dynamic library / shared object / so / ELF files, dependencies as a graph
#!/usr/bin/env python3
# Copyright: Witold Baryluk, 2019-2024. MIT license
# This small program takes one parameter, a binary (or library), and outputs
# a dependency graph. This is done recursively for all subdependencies.
# Some common dependencies are ignored like this ones to glibc basic libraries.
# The ones related to stdc++ / gcc are not ignored (otherwise the graph would be very dense).
#
# To generate and render dependency graph in one go, use something like this:
#
# ./library_dependencies.py ${BINARY} | dot -Grankdir=LR -Nshape=box -Tpng -o dependencies.png /dev/fd/0
#
# TODO(baryluk): Make it more parallel.
# TODO(baryluk): Use `/lib/ld-linux.so.2 --list`, /lib64/ld-linux-x86-64.so.2 --list or ldd directly somehow?
# TODO(baryluk): Name it something short. ldx? ldv? lld?
# TODO(baryluk): Also traverse LD_LIBRARY_PATH
#
# LD_DEBUG=libs,files /bin/ls 2>&1
# LD_DEBUG=libs,files LD_DEBUG_OUTPUT=output_log.txt /bin/ls
#
# Can be used to capture so info, but it is unsecure.
#
# Similarly `ld-linux.so.2 --list` and `ldd` are not secure.
# ldd /bin/ls is basically equivalent do running LD_TRACE_LOADED_OBJECTS=1 ld-linux.so.2 /bin/ls
# In fact ldd is just a bash script that detects elf object architecture and invokes
# proper dynamic linker with proper flags and environment variables.
# but dynamic linker will most likely still invoke init/fini sections of elf objects!
import os.path
import re
import subprocess
import sys
# Note, that libstdc++.so.6 and libgcc_s.so.1 are not in this list on purpose!
IGNORED = {
"ld-linux.so.2",
"ld-linux-x86-64.so.2",
"libc.so.6",
"libm.so.6",
"libdl.so.2",
"libpthread.so.0",
"librt.so.1",
}
def configure_paths(ld_conf: str) -> list[str]:
paths: list[str] = []
for path in open(ld_conf).readlines():
path = path.rstrip()
if not path or path.startswith("#"):
continue
paths.append(path)
return paths
def find_binary(binary: str, paths: list[str]) -> str:
if os.path.exists(binary):
return binary
for path in paths:
full_path = f"{path}/{binary}"
if os.path.exists(full_path):
return full_path
def expand(origin, binary, name, values):
if not values:
return []
origin = origin or (binary.rsplit("/", 1)[0] if "/" in binary else "")
# TODO(baryluk): Support $LIB and $PLATFORM
value = values[0]
if "$ORIGIN" in value or "${ORIGIN}" in value:
print(f"binary {binary} has {name} with $ORIGIN: {value}", file=sys.stderr)
value = value.replace("$ORIGIN", origin).replace("${ORIGIN}", origin)
print(f" after $ORIGIN subsitution: {value}", file=sys.stderr)
return value.split(":")
def concat(a, b):
if a.endswith("/"):
return f"{a}{b}"
if not a:
return b
return f"{a}/{b}"
already_processed = set()
def maybe_recurse_dependencies(origin: str, binary: str, name: str, level: int, paths: list[str], parent_rpaths: list[str] = []) -> None:
global already_processed
if binary in already_processed:
return
already_processed.add(binary)
binary2 = find_binary(binary, paths)
assert binary2, f"Could not find {binary} in paths"
binary = binary2
o = subprocess.run(["objdump", "-p", binary], capture_output=True, check=True, text=True).stdout
dependencies: list[str] = [line.split(maxsplit=1)[1] for line in o.splitlines() if line.startswith(" NEEDED ")]
# Also known as DT_RUNPATH
runpath: list[str] = [line.split(maxsplit=1)[1] for line in o.splitlines() if line.startswith(" RUNPATH ")]
runpath = expand(origin, binary, "RUNPATH", runpath)
rpath: list[str] = [line.split(maxsplit=1)[1] for line in o.splitlines() if line.startswith(" RPATH ")]
rpaths = expand(origin, binary, "RPATH", rpath) or parent_rpaths
for dependency in dependencies:
# print(dependency)
if dependency in IGNORED:
continue
final_dependency = dependency
found = False
if "/" not in dependency:
for rpath_element in rpaths:
if os.path.exists(concat(rpath_element, dependency)):
final_dependency = concat(rpath_element, dependency)
found = True
break
if not found and runpath:
if os.path.exists(concat(runpath[0], dependency)):
final_dependency = concat(runpath[0], dependency)
found = True
if found:
print(f' "{dependency}" [style=filled, fillcolor=green];')
print(f' "{name}" -> "{dependency}";')
maybe_recurse_dependencies(final_dependency.split("/", 1)[0] if "/" in final_dependency else ".", final_dependency, dependency, level + 1, paths, rpaths)
def main():
binary: str = sys.argv[1]
name: str = binary
if "64-bit" in subprocess.run(["file", "--dereference", binary], capture_output=True, check=True, text=True).stdout:
paths: list[str] = configure_paths("/etc/ld.so.conf.d/x86_64-linux-gnu.conf")
else:
paths: list[str] = configure_paths("/etc/ld.so.conf.d/i386-linux-gnu.conf")
print("digraph {")
print(f' "{name}" [style=filled, fillcolor=green];')
maybe_recurse_dependencies("." if "/" not in binary else binary.split("/", 1)[0], binary, name, 0, paths, [])
print("}")
if __name__ == "__main__":
main()
@baryluk
Copy link
Author

baryluk commented Oct 15, 2022

Example output:

mc2

@baryluk
Copy link
Author

baryluk commented Oct 15, 2022

Here is a helper script which might also be useful for determining min required version of each library. Unfortunately due to ldd issues, it doesn't show properly all dependency links as script above, but combined it is useful:

#!/usr/bin/env python3

import subprocess
import sys

import natsort

def main():
    r = subprocess.run(["ldd", "-v", sys.argv[1]], check=True, universal_newlines=True, stdout=subprocess.PIPE)

    deps = {}

    lines = r.stdout.splitlines()
    for i, line in enumerate(lines):
        if line.strip() == "Version information:":
            break

    fullname1 = None
    for line in lines[i:]:
        line = line.rstrip()
        if line.endswith(":"):
            fullname1 = line[:-1].strip()
        else:
            # print(line)
            name2withversion, fullname2 = line.strip().split(" => ")
            version = name2withversion.rsplit(" (", 1)[1].rsplit(")", 1)[0]
            name2 = name2withversion.rsplit(" (", 1)[0].lstrip()
            deps.setdefault(fullname1, {}).setdefault(fullname2, set()).add(version)

    print("digraph {")
    for fullname1, d in deps.items():
       fullname1 = fullname1.removeprefix("/lib/x86_64-linux-gnu/")
       for fullname2, versions in d.items():
           if fullname2 == "/lib64/ld-linux-x86-64.so.2":
               continue
           fullname2 = fullname2.removeprefix("/lib/x86_64-linux-gnu/")
           max_version = natsort.natsorted(versions)[-1]
           print(f"  \"{fullname1}\" -> \"{fullname2}\" [label=\"{max_version}\"];")
    print("}")


if __name__ == "__main__":
    main()

Example output:

mc

@baryluk
Copy link
Author

baryluk commented Oct 15, 2022

Few more examples:

octave
perf
pluma

@baryluk
Copy link
Author

baryluk commented Feb 8, 2024

Added support for $ORIGIN directives in RUNPATH, this makes more binaries and libraries work with the script. I.e. some binaries and libraries in AMD ROCm packages use that they can be relocated as long as relative paths are maintained.

dependencies

@baryluk
Copy link
Author

baryluk commented Mar 3, 2024

Added some RPATH support, better $ORIGIN support, and coloring of local dependencies via rpath/runpath. I cannot guarantee it does work fully correct (I am sure there are cases where it will maybe even miss some things), but looks okish.

Example:

dependencies

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment