Skip to content

Instantly share code, notes, and snippets.

@josharian
Last active Aug 13, 2021
Embed
What would you like to do?
calculate dirty data pages for macho binary
// Command iossize predicts the impact
// on memory usage of changes to the binary.
//
// This implementation uses dyldinfo to determine
// exactly how much memory the dynamic linker will dirty
// as part of launching the executable.
//
// It is not perfect. There are multiple sources of dirty pages:
//
// 1. dynamic loader rebase/relocs due to our code
// 2. dynamic loader rebase/relocs due to dyld itself
// and other linked frameworks
// 3. writes to global bss variables, including by
// Go init functions
// 4. persistent allocations by the Go runtime
// 5. GC-able allocations by regular Go code
//
// (1) and (2) can (in theory) be measured statically.
// (3), (4), and (5) must be measured at run time.
//
// iossize measures (1).
// (2) is generally minimal.
// There is no good tooling I am aware of to measure (3) or (4).
// (5) can be measured using pprof.
package main
import (
"bufio"
"bytes"
"flag"
"fmt"
"log"
"os/exec"
"strconv"
"strings"
)
func main() {
flag.Parse()
if flag.NArg() != 1 {
log.Fatalf("usage: iossize <binary>")
}
cmd := exec.Command("xcrun", "dyldinfo", "-rebase", "-bind", flag.Arg(0))
out, err := cmd.CombinedOutput()
if err != nil {
log.Fatalf("%s: %v\n%s\n", cmd, err, out)
}
dirty := make(dirtyPages)
scan := bufio.NewScanner(bytes.NewReader(out))
for scan.Scan() {
line := scan.Text()
if !strings.Contains(line, "0x") {
// Header line, skip.
continue
}
// Rebase lines look like this:
//
// __DATA_CONST __rodata 0x1000810B8 pointer 0x1000728D0
//
// The fields are: segment, section, address, type, value.
//
// Bind lines look like this:
//
// __DATA __nl_symbol_ptr 0x1000C40A0 pointer 0 libSystem _mach_timebase_info
//
// The fields are: segment, section, address, type, addend, dylib, symbol.
// Note that the symbol may contain spaces, so strings.Fields can have variable length.
//
// We want to keep track of addresses, which are conveniently always third.
// The addresses are where the rebased value will be written.
// We don't keep track of individual addresses;
// rather, track whether any given page is dirty.
fields := strings.Fields(line)
dirty.mark(fields[2])
}
fmt.Println(dirty.mem())
}
// dirtyPages tracks dirty memory pages.
// It assumes 4k pages.
type dirtyPages map[uint64]bool
const pageSize = 4096
// mark marks the memory page associated with addr as dirty.
// addr must be a hex-encoded string, like "0x1000C4278".
func (d dirtyPages) mark(addr string) {
n, err := strconv.ParseUint(addr, 0, 64)
if err != nil {
panic(err)
}
n &^= pageSize - 1
d[n] = true
}
// mem reports the total amount of dirty memory represented by d.
func (d dirtyPages) mem() uint64 {
return uint64(len(d)) * pageSize
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment