Last active
February 19, 2023 14:21
-
-
Save josharian/4ed18ab0c2dd84b51cb45de14e8be36c to your computer and use it in GitHub Desktop.
calculate dirty data pages for macho binary
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Command iossize predicts the impact | |
// on memory usage of changes to the binary. | |
// | |
// This implementation uses dyldinfo to determine | |
// exactly how much memory the dynamic linker will dirty | |
// as part of launching the executable. | |
// | |
// It is not perfect. There are multiple sources of dirty pages: | |
// | |
// 1. dynamic loader rebase/relocs due to our code | |
// 2. dynamic loader rebase/relocs due to dyld itself | |
// and other linked frameworks | |
// 3. writes to global bss variables, including by | |
// Go init functions | |
// 4. persistent allocations by the Go runtime | |
// 5. GC-able allocations by regular Go code | |
// | |
// (1) and (2) can (in theory) be measured statically. | |
// (3), (4), and (5) must be measured at run time. | |
// | |
// iossize measures (1). | |
// (2) is generally minimal. | |
// There is no good tooling I am aware of to measure (3) or (4). | |
// (5) can be measured using pprof. | |
package main | |
import ( | |
"bufio" | |
"bytes" | |
"flag" | |
"fmt" | |
"log" | |
"os/exec" | |
"strconv" | |
"strings" | |
) | |
func main() { | |
flag.Parse() | |
if flag.NArg() != 1 { | |
log.Fatalf("usage: iossize <binary>") | |
} | |
cmd := exec.Command("xcrun", "dyldinfo", "-rebase", "-bind", flag.Arg(0)) | |
out, err := cmd.CombinedOutput() | |
if err != nil { | |
log.Fatalf("%s: %v\n%s\n", cmd, err, out) | |
} | |
dirty := make(dirtyPages) | |
scan := bufio.NewScanner(bytes.NewReader(out)) | |
for scan.Scan() { | |
line := scan.Text() | |
if !strings.Contains(line, "0x") { | |
// Header line, skip. | |
continue | |
} | |
// Rebase lines look like this: | |
// | |
// __DATA_CONST __rodata 0x1000810B8 pointer 0x1000728D0 | |
// | |
// The fields are: segment, section, address, type, value. | |
// | |
// Bind lines look like this: | |
// | |
// __DATA __nl_symbol_ptr 0x1000C40A0 pointer 0 libSystem _mach_timebase_info | |
// | |
// The fields are: segment, section, address, type, addend, dylib, symbol. | |
// Note that the symbol may contain spaces, so strings.Fields can have variable length. | |
// | |
// We want to keep track of addresses, which are conveniently always third. | |
// The addresses are where the rebased value will be written. | |
// We don't keep track of individual addresses; | |
// rather, track whether any given page is dirty. | |
fields := strings.Fields(line) | |
dirty.mark(fields[2]) | |
} | |
fmt.Println(dirty.mem()) | |
} | |
// dirtyPages tracks dirty memory pages. | |
// It assumes 4k pages. | |
type dirtyPages map[uint64]bool | |
const pageSize = 4096 | |
// mark marks the memory page associated with addr as dirty. | |
// addr must be a hex-encoded string, like "0x1000C4278". | |
func (d dirtyPages) mark(addr string) { | |
n, err := strconv.ParseUint(addr, 0, 64) | |
if err != nil { | |
panic(err) | |
} | |
n &^= pageSize - 1 | |
d[n] = true | |
} | |
// mem reports the total amount of dirty memory represented by d. | |
func (d dirtyPages) mem() uint64 { | |
return uint64(len(d)) * pageSize | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment