jesboat/NOTES.md

## example.py
import json
import copy
import sys

from core.cvalue import cast, dereference, addressof, getfieldoffset, sizeof, unsigned
import xnu
from xnu import kern, IterateQueue, GetLLDBThreadForKernelThread, GetObjectAtIndexFromArray

ST_RUNNABLE = 'runnable'
ST_WAIT = 'wait'
ST_IDLE = 'idle'


class Queue(object):
    """
    Efficient FIFO queue which supports adding new elements and iterating
    simultaneously.
    """

    def __init__(self, things=[]):
        """ Creates a new queue, (optionally) pre-adding everything in things """
        self.out_side = list(things)
        self.out_idx = 0
        self.in_side = []

    def add(self, thing):
        """ Add a single element to the end of the queue """
        self.out_side.append(thing)

    def __iter__(self):
        """
        Iterate over items in the queue, including all items which are in the
        queue at the start of the iter and any items which are added during the
        course of the iteration. Items are removed from the queue when they are
        returned by the iterator.
        """

        while self.in_side or self.out_side:
            if self.out_side:
                while self.out_idx < len(self.out_side):
                    item = self.out_side[self.out_idx]
                    self.out_idx += 1
                    yield item
                self.out_side = None
            if self.in_side:
                self.out_side = self.in_side
                self.out_idx = 0
                self.in_side = []

    def size(self):
        """ Returns the number of pending items in the queue. """
        size = len(self.in_side)
        if self.out_side:
            size += len(self.out_side) - self.out_idx
        return size


class WaitqScanner(object):
    """
    Utility for scanning a collection of waitqs in order to build a map from
    wqsets to the waitqs in them.

    Conceptually, wqsets contain waitqs, and each waitq can be in zero or more
    wqsets. However, xnu stores this inverted; instead of wqsets having a list
    of their members, each waitq refers (through a somewhat complicated
    datastructure) to the wqsets which directly contain it. WaitqScanner has
    the logic for interpreting that datastructure, and the logic for building
    the effective map from wqsets to their members.

    Usage is:

        scanner = WaitqScanner()
        for waitq in waitqs:
            scanner.process_waitq(waitq)
        for wqset in wqsets:
            # what you wish you could write
            members = [wq for wq in waitqs if wqset.contains(wq)]
            # what you can write
            members = scanner.get_members_of_wqset(wqset)
    """

    def __init__(self):
        if hasattr(kern.globals, 'g_lt_idx_max'):
            self.idx_mask = int(kern.globals.g_lt_idx_max)
        else:
            self.idx_mask = 0x000000000003ffff

        # Cache from waitq_set_id to array of waitq_set proxies for the sets
        # directly containing waitqs which have `waitq->waitq_set_id == that`.
        self.setid_cache = {}

        # Map from address of waitq_set to waitqs they hold
        self.set_to_members = {}

    def _read_id(self, id):
        """ Given an ID, return a proxy for the `struct waitq_link` """
        # borrowed from lldbmacros/ipc.py
        idx = int(id & self.idx_mask)
        slab_slot = idx / kern.globals.g_wqlinktable.slab_elem
        slab = kern.globals.g_wqlinktable.table[int(slab_slot)]
        addr = int(slab) + int(kern.globals.g_wqlinktable.elem_sz)*(
            idx - cast(slab, 'lt_elem*').lt_id.idx)
        return kern.GetValueFromAddress(addr, 'waitq_link *')

    def _expand_setid(self, id):
        """
        Given an ID (as in the waitq_set_id field of a `struct waitq` or in the
        wql_link.left_setid or wql_link.right_setid of a `struct waitq_link`),
        returns a lis of proxies for all the wqsets which directly contain any
        waitqs which have that ID in their waitq_set_id field.
        """

        if id not in self.setid_cache:
            self.setid_cache[id] = self._do_expand_setid(id)
        return self.setid_cache[id]

    def _do_expand_setid(self, id):
        """ implementation of _expand_setid """
        sets = []

        id_queue = Queue([id])
        for id in id_queue:
            link = self._read_id(id)
            wqe_type = {
                    0: 'FREE',
                    1: 'ELEM',
                    2: 'LINK',
                    3: 'RSVD'
            }[(link.wqte.lt_bits >> 29) & 0x3]
            if wqe_type == 'ELEM':
                sets.append(link.wql_wqs.wql_set)
            elif wqe_type == 'LINK':
                id_queue.add(link.wql_link.left_setid)
                id_queue.add(link.wql_link.right_setid)
            else:
                assert(wqe_type == 'FREE')

        return sets

    def process_waitq(self, waitq_proxy):
        """
        Slurp in all data necessary to determine which wqsets contain that
        waitq
        """

        waitq_addr = int(waitq_proxy)
        setid = int(waitq_proxy.waitq_set_id)
        if setid:
            wqsets = self._expand_setid(setid)
        else:
            wqsets = []
        for wqset in wqsets:
            wqset_addr = int(wqset)
            self.set_to_members.setdefault(wqset_addr, {
                'wqset_addr': wqset_addr,
                'wqset_proxy': wqset,
                'members': {},
            })
            self.set_to_members[wqset_addr]['members'][waitq_addr] = waitq_proxy

    def get_members_of_wqset(self, wqset_proxy):
        """
        Return all waitqs which were processed which are members in the wqset
        """

        wqset_addr = int(wqset_proxy)
        info = self.set_to_members.get(wqset_addr, None)
        if info:
            return info['members'].values()
        else:
            return []


class FullIpcScanner(object):
    """
    Read and cross-reference basically everything about possible IPC. Used
    like:

        s = FullIpcScanner()
        s.process_system()

    All the data is accumulated in `s.store` but it isn't really in a
    nice-to-consume format yet.
    """

    def __init__(self):
        self.queue = Queue()
        self.store = {
            'task': {},
            'port': {},
            'pset': {},
            'kmsg': {},
        }
        self.waitq_scanner = WaitqScanner()
        self.waitq_to_obj = {}

    def process_system(self):
        """ Read/process everything """
        for task_proxy in kern.tasks:
            self.discovered_object('task', task_proxy, 'system', 'tasks')
            self.drain_queue()
        for task_proxy in kern.terminated_tasks:
            self.discovered_object('task', task_proxy, 'system', 'terminated_tasks')
            self.store['task'][int(task_proxy)]['terminated'] = True
            self.drain_queue()
        self.finalize_psets()

    def discovered_object(self, kind, proxy, example_from_kind, example_from_thing):
        """
        Called when the first reference is discovered to some object `(kind, proxy)`.
        Initializes storage for the object (so that any in/out references now known
        can be recorded immediately) and enqueues it for later traversal.
        """

        addr = int(proxy)
        self.store[kind].setdefault(addr, {
            'addr': addr,
            'proxy': proxy,
            'in': [],
            'out': [],
        })
        self.queue.add((kind, proxy, example_from_kind, example_from_thing))

    def drain_queue(self):
        """ Process all enqueued objects until the queue is empty. """
        for kind, proxy, example_from_kind, example_from_thing in self.queue:
            sys.stderr.write(
                "Debug: processing %s [%s] (referenced from %s [%s])...\n"
                % (kind, proxy, example_from_kind, example_from_thing))
            if kind == 'task':
                self.process_task(proxy)
            elif kind == 'port':
                self.process_port(proxy)
            elif kind == 'pset':
                self.process_pset(proxy)
            elif kind == 'kmsg':
                self.process_kmsg(proxy)
            else:
                raise Exception()

    def add_reference(self, frm, to, attributes):
        """
        Called when a references is discovered from some already-known object
        `frm = (from_type, from_addr)` to some object `to = (to_kind, to_proxy)`
        which may or may not yet be known.
        """

        from_type, from_addr = frm
        to_type, to_proxy = to
        to_addr = int(to_proxy)

        assert(from_addr in self.store[from_type])

        if to_addr not in self.store[to_type]:
            self.discovered_object(to_type, to_proxy, from_type, hex(from_addr))

        self.store[from_type][from_addr]['out'].append({
            'to_type': to_type,
            'to_addr': to_addr,
            'attributes': attributes,
        })

        self.store[to_type][to_addr]['in'].append({
            'from_type': from_type,
            'from_addr': from_addr,
            'attributes': attributes,
        })

    def process_task(self, task_proxy):
        """ Discover all out-references from the specified task """
        task_addr = int(task_proxy)

        bsd_info = cast(task_proxy.bsd_info, 'proc*')
        if int(bsd_info):
            task_name = str(bsd_info.p_name)
        elif 'terminated' in self.store['task'][task_addr]:
            task_name = '__terminated'
        else:
            task_name = '__unknown??'

        self.store['task'][task_addr]['task_name'] = task_name

        space_proxy = task_proxy.itk_space
        num_entries = int(space_proxy.is_table_size)
        table_proxy = space_proxy.is_table

        sys.stderr.write("Processing task '%s' (approx %d ports)...\n" % (task_name, num_entries))

        for idx in range(0, num_entries):
            entry_proxy = GetObjectAtIndexFromArray(table_proxy, idx)
            ie_bits = int(entry_proxy.ie_bits)
            if ie_bits & 0x000f0000 and ie_bits & 0x001f0000:
                entry_name = "{:x}".format((idx << 8 | ie_bits >> 24))
                if ie_bits & 0x00100000:
                    pass # dead
                elif ie_bits & 0x00080000:
                    right = 'Set'
                    pset_proxy = cast(entry_proxy.ie_object, 'ipc_pset*')
                    self.add_reference(('task', task_addr), ('pset', pset_proxy), {'name': entry_name})
                else: # port
                    port_proxy = cast(entry_proxy.ie_object, 'ipc_port_t')
                    if ie_bits & 0x00010000:
                        if ie_bits & 0x00020000:
                            rights = 'SendmanyRecv'
                        else:
                            rights = 'Sendmany'
                    elif ie_bits & 0x00020000:
                        rights = 'Recv'
                    elif ie_bits & 0x00040000:
                        rights = 'Sendonce'
                    else:
                        rights = 'Unknown'
                    if int(entry_proxy.index.request):
                        requestsval = port_proxy.ip_requests
                        sorightval = requestsval[int(entry_proxy.index.request)].notify.port
                        soright_ptr = int(sorightval)
                        if soright_ptr != 0:
                            rights += '/dead'
                            if soright_ptr & 0x1: rights += '/sendPossibleArmed'
                            if soright_ptr & 0x2: rights += '/sendPossibleRequested'
                    if port_proxy.ip_nsrequest != 0: rights += '/nosendersNotificationRequested'
                    if port_proxy.ip_pdrequest != 0: rights += '/destroyNotificationRequested'
                    self.add_reference(('task', task_addr), ('port', port_proxy),
                        {'name': entry_name, 'rights': rights})

        for specialkind, specials in [
            ('Sendmany', [
                'itk_self', 'itk_host', 'itk_bootstrap',
                'itk_seatbelt', 'itk_gssd', 'itk_debug_control', 'itk_task_access',
            ]),
            ('Naked', ['itk_nself']),
            ('Recv', ['itk_resume']),
        ]:
            for special in specials:
                port_proxy = getattr(task_proxy, special)
                if int(port_proxy) > 0:
                    self.add_reference(('task', task_addr), ('port', port_proxy),
                            {'special': special, 'rights': specialkind})

        tr_max = sizeof(task_proxy.itk_registered) / sizeof(task_proxy.itk_registered[0])
        for idx in range(0, tr_max):
            port_proxy = task_proxy.itk_registered[idx]
            if int(port_proxy) > 0:
                self.add_reference(('task', task_addr), ('port', port_proxy),
                        {'special': 'registered:' + str(idx), 'rights': 'Sendmany'})

        ex_max = sizeof(task_proxy.exc_actions) / sizeof(task_proxy.exc_actions[0])
        for idx in range(0, ex_max):
            port_proxy = task_proxy.exc_actions[idx].port
            if int(port_proxy) > 0:
                self.add_reference(('task', task_addr), ('port', port_proxy),
                        {'special': 'exc:' + str(idx), 'rights': 'Sendmany'})

        for thread_proxy in IterateQueue(task_proxy.threads, 'thread*', 'task_threads'):
            if int(thread_proxy.ith_sself) > 0:
                self.add_reference(('task', task_addr), ('port', thread_proxy.ith_self),
                        {'special': 'thread_self:0x' + hex(int(thread_proxy)),
                         'rights': 'Sendmany'})
            if int(thread_proxy.ith_special_reply_port) > 0:
                self.add_reference(('task', task_addr), ('port', thread_proxy.ith_special_reply_port),
                        {'special': 'thread_reply:0x' + hex(int(thread_proxy)),
                         'rights': 'Sendonce'})
            if int(thread_proxy.ith_voucher) > 0:
                vport = thread_proxy.ith_voucher.iv_port
                if int(vport) > 0:
                    self.add_reference(('task', task_addr), ('port', vport),
                            {'special': 'thread_voucher:0x' + hex(int(thread_proxy)),
                             'rights': 'Voucher'})
            if int(thread_proxy.exc_actions) > 0:
                for idx in range(0, ex_max):
                    export = thread_proxy.exc_actions[idx].port
                    if int(export) > 0:
                        self.add_reference(('task', task_addr), ('port', export),
                                {'special': 'thread_ex' + hex(int(thread_proxy)) + ':' + str(idx),
                                 'rights': 'Sendmany'})

    def process_port(self, port_proxy):
        """ Discover all out-references from the specified port """
        if port_proxy.ip_messages.data.port.msgcount > 0:
            kmsgheadp = cast(port_proxy.ip_messages.data.port.messages.ikmq_base, 'ipc_kmsg_t')
            kmsgp = kmsgheadp
            while unsigned(kmsgp) > 0:
                self.add_reference(('port', int(port_proxy)), ('kmsg', kmsgp), {})
                kmsgp = kmsgp.ikm_next
                if kmsgp == kmsgheadp:
                    break

        if int(port_proxy.ip_nsrequest):
            self.add_reference(
                ('port', int(port_proxy)),
                ('port', cast(port_proxy.ip_nsrequest, 'ipc_port*')),
                {'kind': 'no-senders'})

        if int(port_proxy.ip_pdrequest):
            self.add_reference(
                ('port', int(port_proxy)),
                ('port', cast(port_proxy.ip_pdrequest, 'ipc_port*')),
                {'kind': 'port-death'})

        if int(port_proxy.ip_requests):
            table = cast(port_proxy.ip_requests, 'struct ipc_port_request *')
            sz = int(table.name.size.its_size)
            for i in range(sz):
                if i == 0:
                    continue
                ipr = table[i]
                if int(ipr.name.name):
                    ipr_bits = int(ipr.notify.port) & 3
                    ipr_port = kern.GetValueFromAddress(int(ipr.notify.port) & ~3, 'ipc_port*')
                    if ipr_bits & 3: ## send-possible armed and requested
                        ipr_disp = "sendPossibleArmedRequested"
                    elif ipr_bits & 2: ## send-possible requested
                        ipr_disp = "sendPossibleRequested"
                    elif ipr_bits & 1: ## send-possible armed
                        ipr_disp = "sendPossibleArmed"
                    else:
                        ipr_disp = "iprOther"
                    self.add_reference(
                        ('port', int(port_proxy)),
                        ('port', ipr_port),
                        {'kind': ipr_disp})

        waitq = port_proxy.ip_messages.data.port.waitq
        self.waitq_scanner.process_waitq(waitq)
        self.waitq_to_obj[int(waitq)] = ('port', int(port_proxy))

    def process_pset(self, pset_proxy):
        """
        Ensure that, once all ports and psets have been processed,
        `self.finalize_psets()` will discover all references from the
        specified pset.
        """

        waitq = pset_proxy.ips_messages.data.pset.setq.wqset_q
        self.waitq_scanner.process_waitq(waitq)
        self.waitq_to_obj[int(waitq)] = ('pset', int(pset_proxy))

    def finalize_psets(self):
        """ Finish pset processing """
        assert not self.queue.size(), 'should only be called at end'
        for pset_addr, info in self.store['pset'].items():
            pset_proxy = info['proxy']
            wqset = pset_proxy.ips_messages.data.pset.setq
            for waitq in self.waitq_scanner.get_members_of_wqset(wqset):
                mem_type, mem_addr = self.waitq_to_obj[int(waitq)]
                self.add_reference(
                    ('pset', pset_addr),
                    (mem_type, mem_addr),
                    {})
        assert not self.queue.size(), \
            'psets should only be able to point to things we encountered already'

    def process_kmsg(self, kmsg_proxy):
        """ Discover all out-references from an in-flight message """

        def disposition_str(disp):
            disps = {
                16: 'recv',
                24: 'dispose-receive',
                17: 'move-send',
                19: 'copy-send',
                20: 'make-send',
                25: 'dispose-send',
                18: 'send-once',
                21: 'make-send-once',
                26: 'dispose-send-once',
            }
            return (disps[disp] if disp in disps else 'unknown:' + str(disp))

        kmsgh = dereference(kmsg_proxy.ikm_header)

        if int(kmsgh.msgh_remote_port):
            self.add_reference(
                ('kmsg', int(kmsg_proxy)),
                ('port', kmsgh.msgh_remote_port),
                {'where': 'remote', 'disposition': disposition_str(kmsgh.msgh_bits & 0x1f)})
        if int(kmsgh.msgh_local_port):
            self.add_reference(
                ('kmsg', int(kmsg_proxy)),
                ('port', kmsgh.msgh_local_port),
                {'where': 'local', 'disposition': disposition_str((kmsgh.msgh_bits & 0x1f00) >> 8)})
        if int(kmsg_proxy.ikm_voucher):
            self.add_reference(
                ('kmsg', int(kmsg_proxy)),
                ('port', kmsg_proxy.ikm_voucher),
                {'where': 'voucher', 'disposition': 'voucher'})

        if kmsgh.msgh_bits & 0x80000000:
            body = cast(addressof(cast(addressof(kmsgh), 'char *')[sizeof(kmsgh)]), 'mach_msg_body_t *')
            dsc_count = dereference(cast(body, 'uint32_t *'))
            dschead = cast(addressof(cast(addressof(body[0]), 'char *')[sizeof('uint32_t')]), 'mach_msg_descriptor_t *')
            dsc_list = []
            for i in range(dsc_count):
                dsc_list.append(dschead[i])
            for dsc in dsc_list:
                dsc_type = unsigned(dsc.type.type)
                if dsc_type == 0: # port
                    disp = dsc.port.disposition
                    ports = [dsc.port.name]
                elif dsc_type == 2: # ool port
                    disp = dsc.ool_ports.disposition
                    ports = [cast(dsc.ool_ports.address, 'ipc_port*')[pidx]
                             for pidx in range(dsc.ool_ports.count)]
                else:
                    continue
                for port in ports:
                    self.add_reference(
                        ('kmsg', int(kmsg_proxy)),
                        ('port', port),
                        {'where': 'sent', 'disposition': disposition_str(disp)})


def get_fork_child_info(child_thread_proxy):
    """
    Given a thread which is waiting to become the child in a vfork (or
    posix_spawn), try to find information about the parent.

    During a posix_spawn-doing-exec, the parent first creates a new task and a
    thread in the new task with continuation task_wait_to_return. Then, the
    parent loads the image into the new task (if applicable), sets the child
    thread's saved state so that a return_from_trap will leave the new thread's
    userland at the entry point of the new image, and unwaits it.  That means
    that, for a decent interval, the information on what's going on is only
    available in the parent. Furthermore, the only thing which can unwait the
    child is the parent, which means if we're trying to find out what the child
    is waiting for, we need to walk to the parent.

    1. Parent is doing a fork. fork has called fork1 which has called cloneproc
    which has called fork_create_child which has called thread_create_waiting
    which has made the child thread. The next steps are for thread_create_waiting
    to return to fok_create_child to return to cloneproc to return to fork1,
    at which point fork1 can call thread_dup, but that hasn't happened yet. AFAICT
    there is no way to associate the child with the parent thread (other than
    looking at the parent process and hoping only one thread is forking, or handling
    every possible intermediate state which seems unrealistic.)

    2. Parent is doing a fork, and has gotten as far as thread_dup. We can identify
    the parent thread by looking for which thread in the parent process has the same
    thread-local segment (as the thread's userspace would see it.)

    3. Parent is doing a posix_spawn with POSIX_SPAWN_SETEXEC, which makes
    posix_spawn behave like an execve-without-fork. posix_spawn has called
    fork_create_child which has called thread_create_waiting. The next steps
    are for thread_create_waiting to return to fork_create_child to return to
    posix_spawn which will stick it in `imgp->ip_new_thread`, but that hasn't
    happened yet. As in 1, AFAICT there is no way to associate them.

    4. Parent is doing posix_spawn with POSIX_SPAWN_SETEXEC and has gotten far enough
    to set `imgp->ip_new_thread` to the new thread. Obvious.

    5. Parent is doing posix_spawn normally. It has called posix_spawn which has
    called fork1 which has called cloneproc which has called fork_create_child
    which has called thread_create_waiting. The next steps are for
    thread_create_waiting, fork_create_child, cloneproc, and fork1 to return; as part
    of the tail of fork1, it writes (via ptr used as an out-arg) the new thread into
    imgp->ip_new_thread, but that hasn't happened yet. As in 1, AFAICT there is no way
    to associate them.

    6. Parent is doing posix_spawn normally, and it has gotten far enough to set
    imgp->ip_new_thread in the new thread. Obvious.

    7. Parent is doing an exec unrelated to a fork. (Note that, while this does
    not produce a new BSD process, it does produce a new Mach task/thread for
    security reasons.) execve has called fork_create_child which has called
    thread_create_waiting. The next step is for thread_create_waiting and
    fork_create_child to return so execve can stick the new thread into
    `imgp->ip_new_thread`, but that hasn't happened yet. Yada yada.

    8. Parent is doing an exec unrelated to a fork, and has gotten far enough
    to stick the new thread in `fork_create_child`. This is PROBABLY easy to handle,
    though I'm not sure exactly how the parent thread/task disappears.

    9. vfork is a giant pile of horror which I haven't even started to understand yet.

    The current implementation handles only a subset of case 6.
    """

    for maybe_parent_task_proxy in kern.tasks:
        maybe_parent_bsd = cast(maybe_parent_task_proxy.bsd_info, 'proc*')
        for maybe_parent_thread_proxy in IterateQueue(maybe_parent_task_proxy.threads, 'thread *', 'task_threads'):
            maybe_parent_thread_lldb = xnu.GetLLDBThreadForKernelThread(maybe_parent_thread_proxy)
            for frame in maybe_parent_thread_lldb.frames:
                if frame.name == 'exec_activate_image':
                    imgp_proxy = kern.GetValueFromAddress(
                        frame.GetValueForVariablePath('imgp').value,
                        'image_params*'
                    )
                    if int(child_thread_proxy) == int(imgp_proxy.ip_new_thread):
                        return {
                            'parent_thread': maybe_parent_thread_proxy,
                            'parent_task': maybe_parent_task_proxy,
                            'parent_task_pid': int(maybe_parent_bsd.p_pid),
                            'parent_task_name': str(maybe_parent_bsd.p_comm),
                            'child_exe_name': str(imgp_proxy.ip_vp.v_name),
                            'child_argv0': str(imgp_proxy.ip_startargv),
                        }
    return None


# Places where a thread can eventually go after it finishes doing whatever it's
# currently in kernel for.
CONTINUATION_BASE_CASES = set([
    # Thead will return to (or make an upcall into) userspace
    'USERSPACE',
    # Thread will terminate
    'TERMINATE',
    # Thread is a kernel thread which stays in kernel forever (or almost
    # forever)
    'KERNEL_DAEMON',
    # The eventual destination of a thread is unknown
    'UNKNOWN',
])

# Given the symbol name of a continuation, maps to a callable which takes the
# thread proxy blocked on that continuation and returns the /next steps/ after
# the continuation. Generally, the /next steps/ are where the thread will go
# once it is continued; in other words, we imagine that the kernel was written
# such that this wait was just a stack wait and then return what the frame
# below the wait would be. Code-wise, the /next steps/ are returned as either
# a string, a list of strings, or a proxy for another continuation.
CONTINUATION_TO_NEXT = {
    'ipc_mqueue_receive_continue':
        lambda thread_proxy: thread_proxy.saved.receive.continuation,
    'workq_unpark_continue':
        lambda thread_proxy: 'USERSPACE:wq',
    '_sleep_continue':
        lambda thread_proxy: cast(thread_proxy.uthread, 'uthread*').uu_continuation,
    'kqueue_scan_continue':
        lambda thread_proxy: cast(thread_proxy.uthread, 'uthread*').uu_save.uus_kqueue_scan.cont,
    'IOWorkLoop::threadMain()':
        lambda thread_proxy: 'TERMINATE',
    'semaphore_wait_continue':
        lambda thread_proxy: thread_proxy.saved.sema.continuation,
    'thread_call_thread':
        lambda thread_proxy: [
            'thread_call_queue:' + str(thread_proxy.thc_state.thc_group.tcg_name),
            'KERNEL_DAEMON',
        ],
    'ulock_wait_continue':
        lambda thread_proxy: 'unix_syscall_return',
    'task_wait_to_return':
        lambda thread_proxy: 'USERSPACE:fork_or_vfork',
    'filt_wlwait_continue':
        lambda thread_proxy: ['kevent_register_wait_return', 'unix_syscall_return'],
    'psynch_cvcontinue':
        lambda thread_proxy: 'USERSPACE:syscall',
    'psynch_mtxcontinue':
        lambda thread_proxy: 'unix_syscall_return',
    'sigcontinue':
        lambda thread_proxy: 'unix_syscall_return',
    'unix_syscall_return':
        lambda thread_proxy: 'USERSPACE:syscall',
    'selcontinue':
        lambda thread_proxy: ['_sleep_continue', 'unix_syscall_return'],
    'wait4_nocancel':
        lambda thread_proxy: ['_sleep_continue', 'unix_syscall_return'],
    'thread_syscall_return':
        lambda thread_proxy: 'USERSPACE:syscall',
    'kevent_continue':
        lambda thread_proxy: 'unix_syscall_return',
    '__posix_sem_syscall_return':
        lambda thread_proxy: 'unix_syscall_return',
    'nxprov_detacher_cont':
        lambda thread_proxy: 'UNKNOWN',
    'kauth_resolver_getwork_continue':
        lambda thread_proxy: [
            'kauth_resolver_getwork2',
            'kauth_resolver_getwork_continue',
            '_sleep_continue',
            'unix_syscall_return',
        ],
    'wait1continue':
        lambda thread_proxy: ['_sleep_continue', 'unix_syscall_return'],
}

# Set of continuations which are kernel daemons without doing anything
# complicated. Conceptually, the semantics of this are identical to
#
#   for c in [this_list]:
#       CONTINUATION_TO_NEXT[c] = lambda thread_proxy: 'KERNEL_DAEMON'
#
# but it's written this way for increased clarity and easier-to-read code.
CONTINUATIONS_KERNEL_DAEMONS = set([
    'aio_work_thread',
    'async_work_continue',
    'audit_worker',
    'bcleanbuf_thread',
    'cfil_info_udp_expire',
    'flowadv_thread_cont',
    'ifnet_detacher_thread_cont',
    'io_reprioritize_thread',
    'mbuf_worker_thread',
    'memorystatus_thread',
    'nwk_wq_thread_cont',
    'pf_purge_thread_cont',
    'sched_timeshare_maintenance_continue',
    'thread_call_daemon_continue',
    'thread_exception_daemon',
    'thread_stack_daemon',
    'thread_terminate_daemon',
    'vm_compressor_swap_trigger_thread',
    'vm_object_reaper_thread',
    'vm_pageout_continue',
    'vm_pageout_garbage_collect',
    'vm_pageout_iothread_external_continue',
    'vm_pageout_iothread_internal_continue',
    'vm_pressure_thread',
    'vm_swapfile_create_thread',
    'vm_swapfile_gc_thread',
    'vm_swapout_thread',
])

def trace_kstack(thread_proxy):
    """
    Given a waiting thread, trace where it will go after it's unblocked.

    For the many threads which have normal kernel stacks, we just return the
    stack; for threads which explicated their continuation, we have dedicated
    handling which understands what the kernel's common continuations will do
    when they are resumed.
    """

    if int(thread_proxy.kernel_stack):
        return [str(frame.function.addr) for frame in GetLLDBThreadForKernelThread(thread_proxy).frames]

    is_kernel = (int(kern.GetGlobalVariable('kernel_task')) == int(thread_proxy.task))

    def continuation_to_name(cont):
        if isinstance(cont, basestring):
            return cont
        else:
            cont_symbol_maybe = xnu.kern.SymbolicateFromAddress(int(cont))
            if cont_symbol_maybe:
                return cont_symbol_maybe[0].name
            else:
                return 'UNKNOWN:0x' + str(cont)

    fakestack = [continuation_to_name(thread_proxy.continuation)]
    while True:
        last_name = fakestack[-1]

        if last_name.split(':')[0] in CONTINUATION_BASE_CASES:
            break
        elif len(fakestack) > 100:
            fakestack.append('RECURSION')
            return fakestack

        if last_name in CONTINUATION_TO_NEXT:
            nexts = CONTINUATION_TO_NEXT[last_name](thread_proxy)
            if not isinstance(nexts, list):
                nexts = [nexts]
        elif last_name in CONTINUATIONS_KERNEL_DAEMONS and is_kernel:
            nexts = ['KERNEL_DAEMON']
        else:
            nexts = ['UNKNOWN']

        nexts = map(continuation_to_name, nexts)
        assert nexts and all([isinstance(x, basestring) for x in nexts])

        fakestack.extend(nexts)

    if fakestack[-1] == 'USERSPACE:fork_or_vfork':
        fork_info = get_fork_child_info(thread_proxy)
        if fork_info:
            fakestack[-1] += (
                (':parent=%d,%s,%x' % (
                    fork_info['parent_task_pid'],
                    fork_info['parent_task_name'],
                    int(fork_info['parent_thread']),
                )) +
                (':child=%s/%s' % (
                    fork_info['child_exe_name'],
                    fork_info['child_argv0'],
                ))
            )

    return fakestack


def analyze_threads():
    """
    Do basic analysis on all threads in the kernel figuring out what (if anything)
    they're blocked on.

    Is not integrated with the system-wide analysis for IPC state.

    Returns a list of dicts each of which contains basic information about a
    thread and is suitable for aggregation or more detailed analysis.
    """

    threads = []

    kernel_task_proxy = kern.GetGlobalVariable('kernel_task')

    for task_proxy in kern.tasks:
        proc_proxy = dereference(cast(task_proxy.bsd_info, 'proc*'))
        is_kernel = (int(task_proxy) == int(kernel_task_proxy))
        for thread_proxy in IterateQueue(task_proxy.threads, 'thread *', 'task_threads'):
            is_wait = bool(thread_proxy.state & 0x1)
            is_idle = bool(thread_proxy.state & 0x80)
            task_name = str(proc_proxy.p_name)
            assert(not is_wait or not is_idle)
            thri = {
                'proxy': thread_proxy,
                'task': task_proxy,
                'task_name': task_name,
                'state': (ST_IDLE if is_idle else ST_WAIT if is_wait else ST_RUNNABLE),
                'wait': ({} if is_wait else None),
                'is_kernel': is_kernel,
            }
            if is_wait:
                if int(thread_proxy.wait_timer_is_set.sbvalue.value):
                    thri['wait']['timer'] = int(thread_proxy.wait_timer.call_entry.deadline.sbvalue.value)
                else:
                    thri['wait']['timer'] = None
                thri['wait']['block_hint'] = thread_proxy.block_hint.sbvalue.value
                thri['wait']['kstack'] = trace_kstack(thread_proxy)
                if int(thread_proxy.kernel_stack) == 0:
                    thri['wait']['continuation'] = thri['wait']['kstack'][0]
                else:
                    thri['wait']['continuation'] = None

            threads.append(thri)

    return threads


def summarize(thri):
    """
    Given a thread-info (as returned by analyze_threads), return a summary
    which is intended to be sufficiently detailed to help debugging but
    sufficiently vague that it can be aggregated efficiently.
    """

    if thri['state'] == ST_IDLE:
        return 'idle'
    elif thri['state'] == ST_RUNNABLE:
        return 'runnable'
    else:
        assert(thri['state'] == ST_WAIT)
        summary = {
            'timer': bool(thri['wait']['timer']),
            'resume': thri['wait']['kstack'],
            'is_kernel': thri['is_kernel'],
            'block_hint': (thri['wait']['block_hint'] if thri['wait']['block_hint'] != 'kThreadWaitNone' else None),
        }
        return summary


def analyze_receiving_thread(thread_proxy):
    """
    Given a thread waiting in kThreadWaitPortReceive, find the port or portset
    it's trying to receive something from.
    """

    assert(str(thread_proxy.block_hint.sbvalue.value) == 'kThreadWaitPortReceive')
    waitq = thread_proxy.waitq
    waitq_type = int(waitq.waitq_type)
    if waitq_type == 3:
        mqueue = kern.GetValueFromAddress(
            hex(int(waitq) - getfieldoffset('struct ipc_mqueue', 'data.pset.setq')),
            'struct ipc_mqueue*')
        pset = kern.GetValueFromAddress(
            hex(int(mqueue) - getfieldoffset('struct ipc_pset', 'ips_messages')),
            'struct ipc_pset*')
        return ('pset', pset)
    elif waitq_type == 2:
        mqueue = kern.GetValueFromAddress(
            hex(int(waitq) - getfieldoffset('struct ipc_mqueue', 'data.port.waitq')),
            'struct ipc_mqueue*')
        port = kern.GetValueFromAddress(
            hex(int(mqueue) - getfieldoffset('struct ipc_port', 'ip_messages')),
            'struct ipc_port*')
        return ('port', port)
    else:
        assert False, 'Unknown waitq_type ' + waitq_type


def dumps(thing):
    """Dump to json canonically"""
    return json.dumps(thing, sort_keys=True)


def histogram(things, keyfn=(lambda v: v)):
    """
    Return a histogram of values, grouped by some key.

    This is the moral equivalent of

        SELECT key, arbitrary(value), count(*)
        FROM (SELECT keyfn(value) as key, value FROM things)
        GROUP BY key

    Params:
      things: iterable of things to make a histogram from
      keyfn:  function from thing to key to group them by, which
              must be suitable as the key in a dict

    Return:
      dict mapping keys (as returned by keyfn) to dicts of the form
        {'val': some_thing_with_that_key, 'count': number_of_things_with_that_key}
    """

    ret = {}
    for thing in things:
        key = keyfn(thing)
        info = ret.setdefault(key, {'val': thing, 'count': 0})
        info['count'] = info['count'] + 1
    return ret


def print_histogram(histog):
    """ Render a histogram to the screen. """
    for key, info in sorted(
        histog.items(),
        key=(lambda entry: (-entry[1]['count'], entry[0])),
    ):
        print("%d\t%r\n" % (info['count'], info['val']))

## extract-kdump.pl
#!/usr/bin/env perl
use strict;
use warnings;

use 5.010;

use autodie;

use Fcntl 'SEEK_SET', 'SEEK_CUR', 'SEEK_END';
use List::Util 'min';
use YAML::XS 'Dump';


# Magic number at start of file, encoded big-endian, as if a string. In C,
#
#   #define MACH_CORE_FILEHEADER_SIGNATURE 0x0063614d20646152ULL
my $MACH_CORE_FILEHEADER_SIGNATURE_BE = pack('H*', '0063614d20646152');

# Magic number at start of file, encoded little-endian, as if a string. In C,
#
#   #define MACH_CORE_FILEHEADER_SIGNATURE 0x0063614d20646152ULL
my $MACH_CORE_FILEHEADER_SIGNATURE_LE =
    reverse($MACH_CORE_FILEHEADER_SIGNATURE_BE);

# Maximum number of subfiles in dump. In C,
#
#   #define MACH_CORE_FILEHEADER_MAXFILES 16
my $MACH_CORE_FILEHEADER_MAXFILES = 16;

# Maximum length of a dump subfile's filename. In C,
#
#   #define MACH_CORE_FILEHEADER_NAMELEN 16
my $MACH_CORE_FILEHEADER_NAMELEN = 16;

# Pack pattern for a `struct mach_core_details` part of a dump header, if
# encoded big-enian. In C,
#
#   struct mach_core_details {
#        uint64_t gzip_offset;
#        uint64_t gzip_length;
#        char core_name[MACH_CORE_FILEHEADER_NAMELEN];
#   };
my $MACH_CORE_DETAILS_PACKPAT_BE = "(QQ)>Z$MACH_CORE_FILEHEADER_NAMELEN";

# Size of a `struct mach_core_details` part of a dump header, in bytes.
my $MACH_CORE_DETAILS_SIZE = 8 + 8 + $MACH_CORE_FILEHEADER_NAMELEN;

# Pack pattern for a `struct mach_core_fileheader` dump header, treating
# the embedded `struct mach_core_details` as opaque, if encoded in big-endian.
# In C,
#
#   struct mach_core_fileheader {
#        uint64_t signature;
#        uint64_t log_offset;
#        uint64_t log_length;
#        uint64_t num_files;
#        // treated as `char files[MACH_CORE_FILEHEADER_MAXFILES][sizeof(struct mach_core_details)]`
#        struct mach_core_details files[MACH_CORE_FILEHEADER_MAXFILES];
#   };
#
my $MACH_CORE_FILEHEADER_PACKPAT_BE =
    "(QQQQ)>(a$MACH_CORE_DETAILS_SIZE)$MACH_CORE_FILEHEADER_MAXFILES";

# Size of a `struct mach_core_fileheader` header, in bytes (including the
# contained `struct mach_core_details`s.)
my $MACH_CORE_FILEHEADER_SIZE =
        8 + 8 + 8 + 8
        + $MACH_CORE_DETAILS_SIZE * $MACH_CORE_FILEHEADER_MAXFILES;


sub bswap64 {
    my ($num) = @_;
    return unpack('Q<', pack('Q>', $num));
}


sub parse_header {
    my ($filename, $fh) = @_;

    seek($fh, 0, SEEK_SET);
    my $read = read($fh, my($header_buf), $MACH_CORE_FILEHEADER_SIZE);
    $read == $MACH_CORE_FILEHEADER_SIZE
        or die "$0: fatal: $filename: unexpected EOF\n";

    my $little_endian = {
            $MACH_CORE_FILEHEADER_SIGNATURE_BE => 0,
            $MACH_CORE_FILEHEADER_SIGNATURE_LE => 1,
        }->{substr($header_buf, 0, length($MACH_CORE_FILEHEADER_SIGNATURE_BE))};
    if (not defined $little_endian) {
        die "$0: fatal: $filename: bad signature\n";
    }

    my ($signature, $log_offset, $log_length, $num_files, @file_bufs) =
        unpack($MACH_CORE_FILEHEADER_PACKPAT_BE, $header_buf);
    if ($little_endian) {
        $signature = bswap64($signature);
        $log_offset = bswap64($log_offset);
        $log_length = bswap64($log_length);
        $num_files = bswap64($num_files);
    }

    if ($num_files > @file_bufs) {
        die "$0: fatal: $filename: claims to have too many ($num_files) files\n";
    } elsif ($num_files == 0) {
        warn "$0: warning: $filename: claims to have no subfiles\n";
    }

    my $file_infos = [];
    for my $i (0 .. ($num_files - 1)) {
        my ($gzip_offset, $gzip_length, $name) =
            unpack($MACH_CORE_DETAILS_PACKPAT_BE, $file_bufs[$i]);
        if ($little_endian) {
            $gzip_offset = bswap64($gzip_offset);
            $gzip_length = bswap64($gzip_length);
        }
        push @$file_infos, {
            gzip_offset => $gzip_offset,
            gzip_length => $gzip_length,
            name => $name,
        };
    }

    return {
        signature => $signature,
        log_offset => $log_offset,
        log_length => $log_length,
        file_infos => $file_infos,
    };
}


sub extract_range {
    my ($filename, $fh, $offset, $length, $outdir, $outfilename) = @_;

    seek($fh, $offset, SEEK_SET);
    open(my($outfh), '>', "$outdir/$outfilename");

    my $last_nz = 0;

    for (my $wrote = 0; $wrote < $length;) {
        my $bufsize = min($length - $wrote, 4096);
        my $read = read($fh, my($buf), $bufsize);
        $read == $bufsize
            or die "$0: fatal: $filename: short read while extracting $outfilename\n";
        if ($buf =~ /[^\x00]/) {
            print {$outfh} $buf;
            $last_nz = $wrote + $bufsize;
        } else {
            # `$buf` is entirely null. Make sparse output file.
            truncate($outfh, $wrote + $bufsize);
            seek($outfh, $bufsize, SEEK_CUR);
        }
        $wrote += $bufsize;
    }

    if ($outfilename eq '_trailing.bin' and $last_nz < $length) {
        truncate($outfh, $last_nz);
    }

    close($outfh);

    return {begin => $offset, end => $offset + $length, name => $outfilename};
}


sub extract_subfile {
    my ($filename, $fh, $file_info, $outdir) = @_;

    my $offset = $file_info->{gzip_offset};
    my $length = $file_info->{gzip_length};
    my $origname = $file_info->{name};

    length($origname)
        or die "$0: fatal: $filename: subfile with empty name\n";
    ($origname !~ /^[_.]/)
        or die "$0: fatal: $filename: subfile with reserved name $origname\n";

    my $outfilename = "$origname.gz";
    if (-f "$outdir/$outfilename") {
        die "$0: fatal: $filename: multiple subfiles named $origname\n";
    }

    return extract_range($filename, $fh, $offset, $length, $outdir, $outfilename);
}


sub extract_unclaimed {
    my ($filename, $fh, $claimed_ranges, $outdir) = @_;

    seek($fh, 0, SEEK_END);
    my $flen = tell($fh);

    # Compute all ranges of `[0, $flen)` which are not contained within any
    # range `[$i->{begin}, $i->{end}) for $i in @$claimed_ranges`. The
    # algorithm is quadratic, but bounded by O(MACH_CORE_FILEHEADER_MAXFILES)
    # so meh.
    my $unclaimed_ranges = [];
    my $unclaimed_name_counter = 0;
    my $offset = 0;
    RANGE:
        while ($offset < $flen)
    {
        # See if `[$offset, $offset+1)` is in any claimed range
        for my $range (@$claimed_ranges) {
            if ($range->{begin} <= $offset and $offset < $range->{end}) {
                # If so, skip ahead to the end of that range
                $offset = $range->{end};
                next RANGE;
            }
        }
        # If no, figure out what range claimed range starts immediately
        # after $offset.
        my $starts_next;
        for my $range (@$claimed_ranges) {
            if ($range->{begin} > $offset) {
                if (not($starts_next) or $range->{begin} < $starts_next->{begin}) {
                    $starts_next = $range;
                }
            }
        }
        # Was there one?
        if ($starts_next) {
            # If there was any, then `[$offset, $starts_next->{begin})` is unclaimed
            push @$unclaimed_ranges, {
                begin => $offset,
                end => $starts_next->{begin},
                name => sprintf('_bonus%02u.bin', $unclaimed_name_counter++),
            };
            $offset = $starts_next->{begin};
        } else {
            # No range starts after => here to EOF is unclaimed
            push @$unclaimed_ranges, {
                begin => $offset,
                end => $flen,
                name => '_trailing.bin',
            };
            $offset = $flen;
        }
    }

    # Write them out, and claim them as we do.
    for my $unclaimed (@$unclaimed_ranges) {
        push @$claimed_ranges, extract_range(
            $filename, $fh,
            $unclaimed->{begin}, $unclaimed->{end} - $unclaimed->{begin},
            $outdir, $unclaimed->{name},
        );
    }
}


sub write_manifest {
    my ($filename, $fh, $claimed_ranges, $header, $outdir) = @_;

    my $manifest = {
        dumpname => $filename,
        files => [
            sort {
                $a->{begin} <=> $b->{begin}
                or
                $a->{end} <=> $b->{end}
                or
                $a->{name} cmp $b->{name}
            }
            @$claimed_ranges
        ],
    };

    open my($outfh), '>', "$outdir/_manifest.yaml";
    print {$outfh} Dump($manifest);
    close($outfh);
}


sub extract_all_parts {
    my ($filename, $fh, $outdir) = @_;

    my $header = parse_header($filename, $fh);
    my $claimed_ranges = [];

    push @$claimed_ranges, extract_range(
        $filename, $fh,
        0, $MACH_CORE_FILEHEADER_SIZE,
        $outdir, "_header.bin",
    );

    push @$claimed_ranges, extract_range(
        $filename, $fh,
        $header->{log_offset}, $header->{log_length},
        $outdir, "_panic.log",
    );

    for my $file_info (@{$header->{file_infos}}) {
        push @$claimed_ranges,
            extract_subfile($filename, $fh, $file_info, $outdir);
    }

    extract_unclaimed($filename, $fh, $claimed_ranges, $outdir);

    write_manifest($filename, $fh, $claimed_ranges, $header, $outdir);
}


sub extract {
    my ($infile, $outdir) = @_;

    if (-e $outdir) {
        die "$0: fatal: output directory already exists\n";
    }
    mkdir($outdir);

    open my($infh), "<", $infile;

    extract_all_parts($infile, $infh, $outdir);
}


sub main {
    my (@args) = @_;

    @args == 2
        or die "Usage: $0 infile outdir\n";

    extract($args[0], $args[1]);

    return 0;
}


exit(main(@ARGV)) if not caller;

## NOTES.md

      
    Raw
  

              NOTES.md
            
          
    Reboot to recovery mode. Turn off SIP and secure boot. Boot back to normal mode.
Go to https://developer.apple.com/download/more/ and click "More" in the upper-right. Search for "Kernel". Download and install the Kernel Debug Kit for your build of macOS (run sw_vers on the command line to get it. A nearby KDK will often work; installing one is harmless.
For the KDK, exact build number is best and should definitely work. Otherwise, after installing it, you can see if the release kernel in the KDK (e.g. /Library/Developer/KDKs/KDK_10.14.6_18G2016.kdk/System/Library/Kernels/) matches the actual kernel (/System/Library/Kernels/kernel) with diff/shasum/whatever. If they match, the KDK will probably work. If they don't, the KDK might work. Probably nothing horrible will happen if you try and it doesn't.
Once you decide to use a KDK (or not), install the development kernel:
sudo mount -uw /
sudo install /Library/Developer/KDKs/KDK_10.14.6_18G2016.kdk/System/Library/Kernels/kernel.development /System/Library/Kernels/
sudo kextcache -invalidate /

Next, allocate some space for a core dump partition. Run diskutil list. The standard configuration will look something like
/dev/disk0 (internal):
   #:                       TYPE NAME                    SIZE       IDENTIFIER
   0:      GUID_partition_scheme                         1.0 TB     disk0
   1:                        EFI EFI                     314.6 MB   disk0s1
   2:                 Apple_APFS Container disk1         997.0 GB   disk0s2

/dev/disk1 (synthesized):
   #:                       TYPE NAME                    SIZE       IDENTIFIER
   0:      APFS Container Scheme -                      +997.0 GB   disk1
                                 Physical Store disk0s2
   1:                APFS Volume Preboot                 43.5 MB    disk1s2
   2:                APFS Volume Recovery                510.4 MB   disk1s3
   3:                APFS Volume VM                      18.3 GB    disk1s4
   4:                APFS Volume your-root-volume-name-here            439.5 GB   disk1s5

Possibly different on catalina. You need to shrink the APFS container a bit to make room for the kernel core partition. Kernel cores don't include most userspace memory and get compressed, so it doesn't need to be huge. (I have ~4GB for a machine with 32GB RAM.)
You can run diskutil apfs list disk1 to see the current exact size of the PV ("size" under the "Physical Store" entry) or diskutil apfs resizeContainer disk0s2 limits to see the limits (adjust disk1 and disk0s2 as appropriate.) If there is less space available according to limits than df, you might need to delete some time machine snapshots; see sudo tmutil listlocalsnapshotdates and sudo tmutil deletelocalsnapshots DATE.
Once you get the physical store's current size and decide how big a core dump partition you want, run something like
diskutil apfs resizeContainer \
    disk0s2 \
    $(( 1000240963584 - 1024 * 1024 * 1024 * 4 )) \
    %5361644D-6163-11AA-AA11-00306543ECAC% kcores 0

which would shrink disk0s2 from a starting size of 1000240963584 down 4GB, and then allocate a core dump partition in the remaining space. (If you already have an atypical configuration with free space after the APFS physical store, I'm not sure if this would make the core dump partition take up that free space or just the space reclaimed from APFS.)
You can actually do this live; however, your system will freeze for a bit during the resize. It will unfreeze eventually.
After that, diskutil list should look something like for the SSD
/dev/disk0 (internal):
   #:                       TYPE NAME                    SIZE       IDENTIFIER
   0:      GUID_partition_scheme                         1.0 TB     disk0
   1:                        EFI EFI                     314.6 MB   disk0s1
   2:                 Apple_APFS Container disk1         997.0 GB   disk0s2
   3:       Apple_KernelCoreDump                         3.2 GB     disk0s3

I recommend zeroing the partition (e.g. sudo dd if=/dev/zero bs=$(( 1024 * 1024 )) of=/dev/disk0s3); make sure you do the right device.
Finally, run
sudo nvram boot-args="-v msgbuf=1048576 kcsuffix=development debug=0x84d44"

The -v turns on verbose mode (prints kernel and bootloader stuff at boot/shutdown instead of hiding it with a splash screen). The msgbuf increases the size of the dmesg buffer so that all the startup stuff gets captured at boot. The kcsuffix=development tells the bootloader to load kernel.development instead of kernel, and the debug flags break down into
Pick:
    DB_ARP                  0x00040          useful if i want to do network panics later
    DB_NMI                  0x00004          NMI (and some other events) enters Debugger
    DB_KERN_DUMP_ON_PANIC   0x00400          panic triggers core dump
    DB_KERN_DUMP_ON_NMI     0x00800          NMI triggers core dump too
    DB_REBOOT_POST_CORE     0x04000          reboot post core (so you know when it's done)
    DB_DISABLE_CROSS_PANIC  0x80000         maybe necessary for machines with bridgeos?
    ----
                            0x84d44

The result is that, when the machine panics or when you do the NMI keystroke (cmd-ctrl-opt-shift-esc, which is not actually an NMI), it will appear to freeze, write a core dump to disk0s2, and then reboot.
Once you get a dump, copy it from the partition into a file with dd and then extract it using extract-kdump.pl, e.g.
sudo dd if=/dev/disk0s3 bs=$(( 1024 * 1024 )) of=blah-20200506-whatever.kdumpimg
sudo chown $(whoami) blah-20200506-whatever.kdumpimg
./extract-kdump.pl blah-20200506-whatever.kdumpimg blah-20200506-whatever.kdumpdir

The actual core dump comes out in blah-20200506-whatever.kdumpdir/kernel.gz.
You can load it with lldb; the KDK ships with a bunch of macros which are definitely easy to use and very well documented.  You might need to run settings set target.load-script-from-symbol-file true in lldb. Some of the tools Apple built for the macros are actually pretty general, and you can use them to build nontrivial analyses. For example, I wrote example.py last summer to debug some recurring system deadlocks caused by Santa.
	#!/usr/bin/env perl
	use strict;
	use warnings;

	use 5.010;

	use autodie;

	use Fcntl 'SEEK_SET', 'SEEK_CUR', 'SEEK_END';
	use List::Util 'min';
	use YAML::XS 'Dump';



	# Magic number at start of file, encoded big-endian, as if a string. In C,
	#
	# #define MACH_CORE_FILEHEADER_SIGNATURE 0x0063614d20646152ULL
	my $MACH_CORE_FILEHEADER_SIGNATURE_BE = pack('H*', '0063614d20646152');

	# Magic number at start of file, encoded little-endian, as if a string. In C,
	#
	# #define MACH_CORE_FILEHEADER_SIGNATURE 0x0063614d20646152ULL
	my $MACH_CORE_FILEHEADER_SIGNATURE_LE =
	reverse($MACH_CORE_FILEHEADER_SIGNATURE_BE);

	# Maximum number of subfiles in dump. In C,
	#
	# #define MACH_CORE_FILEHEADER_MAXFILES 16
	my $MACH_CORE_FILEHEADER_MAXFILES = 16;

	# Maximum length of a dump subfile's filename. In C,
	#
	# #define MACH_CORE_FILEHEADER_NAMELEN 16
	my $MACH_CORE_FILEHEADER_NAMELEN = 16;

	# Pack pattern for a `struct mach_core_details` part of a dump header, if
	# encoded big-enian. In C,
	#
	# struct mach_core_details {
	# uint64_t gzip_offset;
	# uint64_t gzip_length;
	# char core_name[MACH_CORE_FILEHEADER_NAMELEN];
	# };
	my $MACH_CORE_DETAILS_PACKPAT_BE = "(QQ)>Z$MACH_CORE_FILEHEADER_NAMELEN";

	# Size of a `struct mach_core_details` part of a dump header, in bytes.
	my $MACH_CORE_DETAILS_SIZE = 8 + 8 + $MACH_CORE_FILEHEADER_NAMELEN;

	# Pack pattern for a `struct mach_core_fileheader` dump header, treating
	# the embedded `struct mach_core_details` as opaque, if encoded in big-endian.
	# In C,
	#
	# struct mach_core_fileheader {
	# uint64_t signature;
	# uint64_t log_offset;
	# uint64_t log_length;
	# uint64_t num_files;
	# // treated as `char files[MACH_CORE_FILEHEADER_MAXFILES][sizeof(struct mach_core_details)]`
	# struct mach_core_details files[MACH_CORE_FILEHEADER_MAXFILES];
	# };
	#
	my $MACH_CORE_FILEHEADER_PACKPAT_BE =
	"(QQQQ)>(a$MACH_CORE_DETAILS_SIZE)$MACH_CORE_FILEHEADER_MAXFILES";

	# Size of a `struct mach_core_fileheader` header, in bytes (including the
	# contained `struct mach_core_details`s.)
	my $MACH_CORE_FILEHEADER_SIZE =
	8 + 8 + 8 + 8
	+ $MACH_CORE_DETAILS_SIZE * $MACH_CORE_FILEHEADER_MAXFILES;


	sub bswap64 {
	my ($num) = @_;
	return unpack('Q<', pack('Q>', $num));
	}


	sub parse_header {
	my ($filename, $fh) = @_;

	seek($fh, 0, SEEK_SET);
	my $read = read($fh, my($header_buf), $MACH_CORE_FILEHEADER_SIZE);
	$read == $MACH_CORE_FILEHEADER_SIZE
	or die "$0: fatal: $filename: unexpected EOF\n";

	my $little_endian = {
	$MACH_CORE_FILEHEADER_SIGNATURE_BE => 0,
	$MACH_CORE_FILEHEADER_SIGNATURE_LE => 1,
	}->{substr($header_buf, 0, length($MACH_CORE_FILEHEADER_SIGNATURE_BE))};
	if (not defined $little_endian) {
	die "$0: fatal: $filename: bad signature\n";
	}

	my ($signature, $log_offset, $log_length, $num_files, @file_bufs) =
	unpack($MACH_CORE_FILEHEADER_PACKPAT_BE, $header_buf);
	if ($little_endian) {
	$signature = bswap64($signature);
	$log_offset = bswap64($log_offset);
	$log_length = bswap64($log_length);
	$num_files = bswap64($num_files);
	}

	if ($num_files > @file_bufs) {
	die "$0: fatal: $filename: claims to have too many ($num_files) files\n";
	} elsif ($num_files == 0) {
	warn "$0: warning: $filename: claims to have no subfiles\n";
	}

	my $file_infos = [];
	for my $i (0 .. ($num_files - 1)) {
	my ($gzip_offset, $gzip_length, $name) =
	unpack($MACH_CORE_DETAILS_PACKPAT_BE, $file_bufs[$i]);
	if ($little_endian) {
	$gzip_offset = bswap64($gzip_offset);
	$gzip_length = bswap64($gzip_length);
	}
	push @$file_infos, {
	gzip_offset => $gzip_offset,
	gzip_length => $gzip_length,
	name => $name,
	};
	}

	return {
	signature => $signature,
	log_offset => $log_offset,
	log_length => $log_length,
	file_infos => $file_infos,
	};
	}


	sub extract_range {
	my ($filename, $fh, $offset, $length, $outdir, $outfilename) = @_;

	seek($fh, $offset, SEEK_SET);
	open(my($outfh), '>', "$outdir/$outfilename");

	my $last_nz = 0;

	for (my $wrote = 0; $wrote < $length;) {
	my $bufsize = min($length - $wrote, 4096);
	my $read = read($fh, my($buf), $bufsize);
	$read == $bufsize
	or die "$0: fatal: $filename: short read while extracting $outfilename\n";
	if ($buf =~ /[^\x00]/) {
	print {$outfh} $buf;
	$last_nz = $wrote + $bufsize;
	} else {
	# `$buf` is entirely null. Make sparse output file.
	truncate($outfh, $wrote + $bufsize);
	seek($outfh, $bufsize, SEEK_CUR);
	}
	$wrote += $bufsize;
	}

	if ($outfilename eq '_trailing.bin' and $last_nz < $length) {
	truncate($outfh, $last_nz);
	}

	close($outfh);

	return {begin => $offset, end => $offset + $length, name => $outfilename};
	}


	sub extract_subfile {
	my ($filename, $fh, $file_info, $outdir) = @_;

	my $offset = $file_info->{gzip_offset};
	my $length = $file_info->{gzip_length};
	my $origname = $file_info->{name};

	length($origname)
	or die "$0: fatal: $filename: subfile with empty name\n";
	($origname !~ /^[_.]/)
	or die "$0: fatal: $filename: subfile with reserved name $origname\n";

	my $outfilename = "$origname.gz";
	if (-f "$outdir/$outfilename") {
	die "$0: fatal: $filename: multiple subfiles named $origname\n";
	}

	return extract_range($filename, $fh, $offset, $length, $outdir, $outfilename);
	}


	sub extract_unclaimed {
	my ($filename, $fh, $claimed_ranges, $outdir) = @_;

	seek($fh, 0, SEEK_END);
	my $flen = tell($fh);

	# Compute all ranges of `[0, $flen)` which are not contained within any
	# range `[$i->{begin}, $i->{end}) for $i in @$claimed_ranges`. The
	# algorithm is quadratic, but bounded by O(MACH_CORE_FILEHEADER_MAXFILES)
	# so meh.
	my $unclaimed_ranges = [];
	my $unclaimed_name_counter = 0;
	my $offset = 0;
	RANGE:
	while ($offset < $flen)
	{
	# See if `[$offset, $offset+1)` is in any claimed range
	for my $range (@$claimed_ranges) {
	if ($range->{begin} <= $offset and $offset < $range->{end}) {
	# If so, skip ahead to the end of that range
	$offset = $range->{end};
	next RANGE;
	}
	}
	# If no, figure out what range claimed range starts immediately
	# after $offset.
	my $starts_next;
	for my $range (@$claimed_ranges) {
	if ($range->{begin} > $offset) {
	if (not($starts_next) or $range->{begin} < $starts_next->{begin}) {
	$starts_next = $range;
	}
	}
	}
	# Was there one?
	if ($starts_next) {
	# If there was any, then `[$offset, $starts_next->{begin})` is unclaimed
	push @$unclaimed_ranges, {
	begin => $offset,
	end => $starts_next->{begin},
	name => sprintf('_bonus%02u.bin', $unclaimed_name_counter++),
	};
	$offset = $starts_next->{begin};
	} else {
	# No range starts after => here to EOF is unclaimed
	push @$unclaimed_ranges, {
	begin => $offset,
	end => $flen,
	name => '_trailing.bin',
	};
	$offset = $flen;
	}
	}

	# Write them out, and claim them as we do.
	for my $unclaimed (@$unclaimed_ranges) {
	push @$claimed_ranges, extract_range(
	$filename, $fh,
	$unclaimed->{begin}, $unclaimed->{end} - $unclaimed->{begin},
	$outdir, $unclaimed->{name},
	);
	}
	}


	sub write_manifest {
	my ($filename, $fh, $claimed_ranges, $header, $outdir) = @_;

	my $manifest = {
	dumpname => $filename,
	files => [
	sort {
	$a->{begin} <=> $b->{begin}
	or
	$a->{end} <=> $b->{end}
	or
	$a->{name} cmp $b->{name}
	}
	@$claimed_ranges
	],
	};

	open my($outfh), '>', "$outdir/_manifest.yaml";
	print {$outfh} Dump($manifest);
	close($outfh);
	}


	sub extract_all_parts {
	my ($filename, $fh, $outdir) = @_;

	my $header = parse_header($filename, $fh);
	my $claimed_ranges = [];

	push @$claimed_ranges, extract_range(
	$filename, $fh,
	0, $MACH_CORE_FILEHEADER_SIZE,
	$outdir, "_header.bin",
	);

	push @$claimed_ranges, extract_range(
	$filename, $fh,
	$header->{log_offset}, $header->{log_length},
	$outdir, "_panic.log",
	);

	for my $file_info (@{$header->{file_infos}}) {
	push @$claimed_ranges,
	extract_subfile($filename, $fh, $file_info, $outdir);
	}

	extract_unclaimed($filename, $fh, $claimed_ranges, $outdir);

	write_manifest($filename, $fh, $claimed_ranges, $header, $outdir);
	}


	sub extract {
	my ($infile, $outdir) = @_;

	if (-e $outdir) {
	die "$0: fatal: output directory already exists\n";
	}
	mkdir($outdir);

	open my($infh), "<", $infile;

	extract_all_parts($infile, $infh, $outdir);
	}


	sub main {
	my (@args) = @_;

	@args == 2
	or die "Usage: $0 infile outdir\n";

	extract($args[0], $args[1]);

	return 0;
	}


	exit(main(@ARGV)) if not caller;