Skip to content

Instantly share code, notes, and snippets.

@PoroCYon
Last active May 9, 2024 23:55
Show Gist options
  • Save PoroCYon/e3bb296ea1b1800fb813bfb38933df0b to your computer and use it in GitHub Desktop.
Save PoroCYon/e3bb296ea1b1800fb813bfb38933df0b to your computer and use it in GitHub Desktop.

External reference sources for smol: the Shoddy Minsize-Oriented Linker

Further reading

See also

Source code references for smol: the Shoddy Minsize-Oriented Linker

elf.h

link (glibc source)

#define DT_DEBUG        21              /* For debugging; unspecified */

link.h

struct r_debug:

link (glibc 2.39 source)

struct r_debug
  {
    /* Version number for this protocol.  It should be greater than 0.  */
    int r_version;

    struct link_map *r_map;     /* Head of the chain of loaded objects.  */

    /* This is the address of a function internal to the run-time linker,
       that will always be called when the linker begins to map in a
       library or unmap it, and again when the mapping change is complete.
       The debugger can set a breakpoint at this address if it wants to
       notice shared object mapping changes.  */
    ElfW(Addr) r_brk;
    enum
      {
        /* This state value describes the mapping change taking place when
           the `r_brk' address is called.  */
        RT_CONSISTENT,          /* Mapping change is complete.  */
        RT_ADD,                 /* Beginning to add a new object.  */
        RT_DELETE               /* Beginning to remove an object mapping.  */
      } r_state;

    ElfW(Addr) r_ldbase;        /* Base address the linker is loaded at.  */
  };

struct link_map:

link (glibc 2.39 source)

struct link_map
  {
    /* These first few members are part of the protocol with the debugger.
       This is the same format used in SVR4.  */

    ElfW(Addr) l_addr;          /* Difference between the address in the ELF
                                   file and the addresses in memory.  */
    char *l_name;               /* Absolute file name object was found in.  */
    ElfW(Dyn) *l_ld;            /* Dynamic section of the shared object.  */
    struct link_map *l_next, *l_prev; /* Chain of loaded objects.  */
  };

glibc's internal link.h

link (glibc 2.39 source)

struct link_map
  {
    /* These first few members are part of the protocol with the debugger.
       This is the same format used in SVR4.  */

    ElfW(Addr) l_addr;          /* Difference between the address in the ELF
                                   file and the addresses in memory.  */
    char *l_name;               /* Absolute file name object was found in.  */
    ElfW(Dyn) *l_ld;            /* Dynamic section of the shared object.  */
    struct link_map *l_next, *l_prev; /* Chain of loaded objects.  */

    /* All following members are internal to the dynamic linker.
       They may change without notice.  */

    /* This is an element which is only ever different from a pointer to
       the very same copy of this type for ld.so when it is used in more
       than one namespace.  */
    struct link_map *l_real;

    /* Number of the namespace this link map belongs to.  */
    Lmid_t l_ns;

    struct libname_list *l_libname;
    /* Indexed pointers to dynamic section.
       [0,DT_NUM) are indexed by the processor-independent tags.
       [DT_NUM,DT_NUM+DT_THISPROCNUM) are indexed by the tag minus DT_LOPROC.
       [DT_NUM+DT_THISPROCNUM,DT_NUM+DT_THISPROCNUM+DT_VERSIONTAGNUM) are
       indexed by DT_VERSIONTAGIDX(tagvalue).
       [DT_NUM+DT_THISPROCNUM+DT_VERSIONTAGNUM,
        DT_NUM+DT_THISPROCNUM+DT_VERSIONTAGNUM+DT_EXTRANUM) are indexed by
       DT_EXTRATAGIDX(tagvalue).
       [DT_NUM+DT_THISPROCNUM+DT_VERSIONTAGNUM+DT_EXTRANUM,
        DT_NUM+DT_THISPROCNUM+DT_VERSIONTAGNUM+DT_EXTRANUM+DT_VALNUM) are
       indexed by DT_VALTAGIDX(tagvalue) and
       [DT_NUM+DT_THISPROCNUM+DT_VERSIONTAGNUM+DT_EXTRANUM+DT_VALNUM,
        DT_NUM+DT_THISPROCNUM+DT_VERSIONTAGNUM+DT_EXTRANUM+DT_VALNUM+DT_ADDRNUM)
       are indexed by DT_ADDRTAGIDX(tagvalue), see <elf.h>.  */

    ElfW(Dyn) *l_info[DT_NUM + DT_THISPROCNUM + DT_VERSIONTAGNUM
                      + DT_EXTRANUM + DT_VALNUM + DT_ADDRNUM];
    const ElfW(Phdr) *l_phdr;   /* Pointer to program header table in core.  */
    ElfW(Addr) l_entry;         /* Entry point location.  */
    ElfW(Half) l_phnum;         /* Number of program header entries.  */
    ElfW(Half) l_ldnum;         /* Number of dynamic segment entries.  */

    /* Array of DT_NEEDED dependencies and their dependencies, in
       dependency order for symbol lookup (with and without
       duplicates).  There is no entry before the dependencies have
       been loaded.  */
    struct r_scope_elem l_searchlist;

    /* We need a special searchlist to process objects marked with
       DT_SYMBOLIC.  */
    struct r_scope_elem l_symbolic_searchlist;

    /* Dependent object that first caused this object to be loaded.  */
    struct link_map *l_loader;

    /* Array with version names.  */
    struct r_found_version *l_versions;
    unsigned int l_nversions;

    /* Symbol hash table.  */
    Elf_Symndx l_nbuckets;
    Elf32_Word l_gnu_bitmask_idxbits;
    Elf32_Word l_gnu_shift;
    const ElfW(Addr) *l_gnu_bitmask;
    union
    {
      const Elf32_Word *l_gnu_buckets;
      const Elf_Symndx *l_chain;
    };
    union
    {
      const Elf32_Word *l_gnu_chain_zero;
      const Elf_Symndx *l_buckets;
    };

    /* ... and many many other fields. */
  };

glibc's sysdeps/x86_64/dl-machine.h

link (glibc 2.39 source)

/* Initial entry point code for the dynamic linker.
   The C function `_dl_start' is the real entry point;
   its return value is the user program's entry point.  */
#define RTLD_START asm ("\n\
.text\n\
        .align 16\n\
.globl _start\n\
.globl _dl_start_user\n\
_start:\n\
        movq %rsp, %rdi\n\
        call _dl_start\n\
_dl_start_user:\n\
        # Save the user entry point address in %r12.\n\
        movq %rax, %r12\n\
        # Save %rsp value in %r13.\n\
        movq %rsp, %r13\n\
"\
        RTLD_START_ENABLE_X86_FEATURES \
"\
        # Read the original argument count.\n\
        movq (%rsp), %rdx\n\
        # Call _dl_init (struct link_map *main_map, int argc, char **argv, char **env)\n\
        # argc -> rsi\n\
        movq %rdx, %rsi\n\
        # And align stack for the _dl_init call. \n\
        andq $-16, %rsp\n\
        # _dl_loaded -> rdi\n\
        movq _rtld_local(%rip), %rdi\n\
        # env -> rcx\n\
        leaq 16(%r13,%rdx,8), %rcx\n\
        # argv -> rdx\n\
        leaq 8(%r13), %rdx\n\
        # Clear %rbp to mark outermost frame obviously even for constructors.\n\
        xorl %ebp, %ebp\n\
        # Call the function to run the initializers.\n\
        call _dl_init\n\
        # Pass our finalizer function to the user in %rdx, as per ELF ABI.\n\
        leaq _dl_fini(%rip), %rdx\n\
        # And make sure %rsp points to argc stored on the stack.\n\
        movq %r13, %rsp\n\
        # Jump to the user's entry point.\n\
        jmp *%r12\n\
.previous\n\
");

Note how it calls _dl_init with a pointer to the link_map in rdi, steming from _rtld_local. The call _dl_init instruction leaves a pointer to the subsequent leaq instruction on the stack. This means we can get the address of the movq _rtld_local(%rip), %rdi instruction at the "user's entry point", and from there calculate back the absolute address of _rtld_local._dl_ns[0]._ns_loaded, which is the link_map we need.

_rtld_local is a struct of type rtld_global (yeah, the naming is confusing) which is defined here (glibc 2.39 source).

The final assembly code snippet that recovers the link_map is thus (source):

    mov r12, [rsp -  8]
    mov ebx, dword [r12 - 20]
    mov r12, [r12 + rbx - 16]

glibc's elf/dl-runtime.c

link (glibc 2.39 source)

This is the code of the _dl_fixup function. It is used to 1) resolve a reference to an external symbol, 2) write its address back into the GOT, and 3) call this function. This can be used to bootstrap finding dlsym which can then be used to resolve all other symbols.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment