akshithg/large_matrix.txt

## large_matrix.txt
---#
arch/x86/kernel/process.c:119
---
static int set_new_tls(struct task_struct *p, unsigned long tls)
{
	struct user_desc __user *utls = (struct user_desc __user *)tls;

	if (in_ia32_syscall()) ## x ##
		return do_set_thread_area(p, -1, utls, 0);
	else
		return do_set_thread_area_64(p, ARCH_SET_FS, tls);
}
---
`in_ia32_syscall()` check if the current process is in a 32-bit syscall.
if so, it will use `do_set_thread_area()` to set the `tls` (Thread Local Storage).
Otherwise, it will use `do_set_thread_area_64()` to set the `tls`.
---#
arch/x86/kernel/cpu/mce/core.c:1519
---
static void mce_timer_fn(struct timer_list *t)
{
	struct timer_list *cpu_t = this_cpu_ptr(&mce_timer);
	unsigned long iv;

	WARN_ON(cpu_t != t);

	iv = __this_cpu_read(mce_next_interval);

	if (mce_available(this_cpu_ptr(&cpu_info))) {
		machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));

		if (mce_intel_cmci_poll()) {
			iv = mce_adjust_timer(iv);
			goto done;
		}
	}

	/*
	 * Alert userspace if needed. If we logged an MCE, reduce the polling
	 * interval, otherwise increase the polling interval.
	 */
	if (mce_notify_irq())
		iv = max(iv / 2, (unsigned long) HZ/100); ## x ##
	else
		iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));

done:
	__this_cpu_write(mce_next_interval, iv);
	__start_timer(t, iv);
}
---
`mce_notify_irq()` will check  a MCE (Machine Check Exception) happened. If so
it will increase the polling interval. Otherwise, it will decrease the polling
interval.
---#
arch/x86/kernel/hpet.c:699
---
static u64 read_hpet(struct clocksource *cs)
{
	unsigned long flags;
	union hpet_lock old, new;

	BUILD_BUG_ON(sizeof(union hpet_lock) != 8);

	/*
	 * Read HPET directly if in NMI.
	 */
	if (in_nmi())
		return (u64)hpet_readl(HPET_COUNTER);

	/*
	 * Read the current state of the lock and HPET value atomically.
	 */
	old.lockval = READ_ONCE(hpet.lockval); ## x ##

	if (arch_spin_is_locked(&old.lock))
		goto contended;

	local_irq_save(flags);
	if (arch_spin_trylock(&hpet.lock)) {
		new.value = hpet_readl(HPET_COUNTER);
		/*
		 * Use WRITE_ONCE() to prevent store tearing.
		 */
		WRITE_ONCE(hpet.value, new.value);
		arch_spin_unlock(&hpet.lock);
		local_irq_restore(flags);
		return (u64)new.value;
	}
	local_irq_restore(flags);

contended:
	/*
	 * Contended case
	 * --------------
	 * Wait until the HPET value change or the lock is free to indicate
	 * its value is up-to-date.
	 *
	 * It is possible that old.value has already contained the latest
	 * HPET value while the lock holder was in the process of releasing
	 * the lock. Checking for lock state change will enable us to return
	 * the value immediately instead of waiting for the next HPET reader
	 * to come along.
	 */
	do {
		cpu_relax();
		new.lockval = READ_ONCE(hpet.lockval);
	} while ((new.value == old.value) && arch_spin_is_locked(&new.lock));

	return (u64)new.value;
}
---
`READ_ONCE()` is a macro to read a variable without any compiler optimization.
https://www.kernel.org/doc/Documentation/memory-barriers.txt
If `in_nmi` is true, it will read the HPET directly. Otherwise, it will
read the HPET value atomically by using `arch_spin_trylock()`.
---#
arch/x86/kernel/process.h:26
---
/*
 * This needs to be inline to optimize for the common case where no extra
 * work needs to be done.
 */
static inline void switch_to_extra(struct task_struct *prev,
				   struct task_struct *next)
{
	unsigned long next_tif = task_thread_info(next)->flags;
	unsigned long prev_tif = task_thread_info(prev)->flags;

	if (IS_ENABLED(CONFIG_SMP)) {
		/*
		 * Avoid __switch_to_xtra() invocation when conditional
		 * STIBP is disabled and the only different bit is
		 * TIF_SPEC_IB. For CONFIG_SMP=n TIF_SPEC_IB is not
		 * in the TIF_WORK_CTXSW masks.
		 */
		if (!static_branch_likely(&switch_to_cond_stibp)) { ## x ##
			prev_tif &= ~_TIF_SPEC_IB;
			next_tif &= ~_TIF_SPEC_IB;
		}
	}

	/*
	 * __switch_to_xtra() handles debug registers, i/o bitmaps,
	 * speculation mitigations etc.
	 */
	if (unlikely(next_tif & _TIF_WORK_CTXSW_NEXT ||
		     prev_tif & _TIF_WORK_CTXSW_PREV))
		__switch_to_xtra(prev, next);
}
---
`static_branch_likely()` is a macro to check if a static branch is enabled.
If not, it sets the speculation to inverse of `TIF_SPEC_IB`
(Indirect Branch Speculation).
---#
arch/x86/kernel/process_64.c:213
---
/*
 * Saves the FS or GS base for an outgoing thread if FSGSBASE extensions are
 * not available.  The goal is to be reasonably fast on non-FSGSBASE systems.
 * It's forcibly inlined because it'll generate better code and this function
 * is hot.
 */
static __always_inline void save_base_legacy(struct task_struct *prev_p,
					     unsigned short selector,
					     enum which_selector which)
{
	if (likely(selector == 0)) { ## x ##
		/*
		 * On Intel (without X86_BUG_NULL_SEG), the segment base could
		 * be the pre-existing saved base or it could be zero.  On AMD
		 * (with X86_BUG_NULL_SEG), the segment base could be almost
		 * anything.
		 *
		 * This branch is very hot (it's hit twice on almost every
		 * context switch between 64-bit programs), and avoiding
		 * the RDMSR helps a lot, so we just assume that whatever
		 * value is already saved is correct.  This matches historical
		 * Linux behavior, so it won't break existing applications.
		 *
		 * To avoid leaking state, on non-X86_BUG_NULL_SEG CPUs, if we
		 * report that the base is zero, it needs to actually be zero:
		 * see the corresponding logic in load_seg_legacy.
		 */
	} else {
		/*
		 * If the selector is 1, 2, or 3, then the base is zero on
		 * !X86_BUG_NULL_SEG CPUs and could be anything on
		 * X86_BUG_NULL_SEG CPUs.  In the latter case, Linux
		 * has never attempted to preserve the base across context
		 * switches.
		 *
		 * If selector > 3, then it refers to a real segment, and
		 * saving the base isn't necessary.
		 */
		if (which == FS)
			prev_p->thread.fsbase = 0;
		else
			prev_p->thread.gsbase = 0;
	}
}
---
this is related to branch prediction, and setting up the FS, GS base registers.
if `selector` is likely to be 0, then noting happens
otherwise, it will set either the `fsbase` or `gsbase` to 0.
---#
arch/x86/kernel/process_64.c:629
---
/*
 *	switch_to(x,y) should switch tasks from x to y.
 *
 * This could still be optimized:
 * - fold all the options into a flag word and test it with a single test.
 * - could test fs/gs bitsliced
 *
 * Kprobes not supported here. Set the probe on schedule instead.
 * Function graph tracer not supported too.
 */
__visible __notrace_funcgraph struct task_struct *
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
	struct thread_struct *prev = &prev_p->thread;
	struct thread_struct *next = &next_p->thread;
	struct fpu *prev_fpu = &prev->fpu;
	struct fpu *next_fpu = &next->fpu;
	int cpu = smp_processor_id();

# ...

	/*
	 * Switch the PDA and FPU contexts.
	 */
	this_cpu_write(current_task, next_p);
	this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));

	switch_fpu_finish(next_fpu);

	/* Reload sp0. */
	update_task_stack(next_p);

	switch_to_extra(prev_p, next_p);

	if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
		/*
		 * AMD CPUs have a misfeature: SYSRET sets the SS selector but
		 * does not update the cached descriptor.  As a result, if we
		 * do SYSRET while SS is NULL, we'll end up in user mode with
		 * SS apparently equal to __USER_DS but actually unusable.
		 *
		 * The straightforward workaround would be to fix it up just
		 * before SYSRET, but that would slow down the system call
		 * fast paths.  Instead, we ensure that SS is never NULL in
		 * system call context.  We do this by replacing NULL SS
		 * selectors at every context switch.  SYSCALL sets up a valid
		 * SS, so the only way to get NULL is to re-enter the kernel
		 * from CPL 3 through an interrupt.  Since that can't happen
		 * in the same task as a running syscall, we are guaranteed to
		 * context switch between every interrupt vector entry and a
		 * subsequent SYSRET.
		 *
		 * We read SS first because SS reads are much faster than
		 * writes.  Out of caution, we force SS to __KERNEL_DS even if
		 * it previously had a different non-NULL value.
		 */
		unsigned short ss_sel;
		savesegment(ss, ss_sel);
		if (ss_sel != __KERNEL_DS) ## x ##
			loadsegment(ss, __KERNEL_DS);
	}

	/* Load the Intel cache allocation PQR MSR. */
	resctrl_sched_in();

	return prev_p;
}
---
`switch_to` is the function that switches from one task to another. this line is
a fix for a known bug in AMD CPUs. SS reads are much faster than writes. Out of
caution, SS is forced to __KERNEL_DS even if it previously had a different
non-NULL value.
---#
arch/x86/kernel/signal.c:91
---
static int restore_sigcontext(struct pt_regs *regs,
			      struct sigcontext __user *usc,
			      unsigned long uc_flags)
{
	struct sigcontext sc;

	/* Always make any pending restarted system calls return -EINTR */
	current->restart_block.fn = do_no_restart_syscall;

	if (copy_from_user(&sc, usc, CONTEXT_COPY_SIZE)) ## x ##
		return -EFAULT;

#...

	/* Get CS/SS and force CPL3 */
	regs->cs = sc.cs | 0x03;
	regs->ss = sc.ss | 0x03;

	regs->flags = (regs->flags & ~FIX_EFLAGS) | (sc.flags & FIX_EFLAGS);
	/* disable syscall checks */
	regs->orig_ax = -1;

#ifdef CONFIG_X86_64
	/*
	 * Fix up SS if needed for the benefit of old DOSEMU and
	 * CRIU.
	 */
	if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) && user_64bit_mode(regs)))
		force_valid_ss(regs);
#endif

	return fpu__restore_sig((void __user *)sc.fpstate,
			       IS_ENABLED(CONFIG_X86_32));
}
---
`restore_sigcontext` is the function that restores the context of a signal.
`copy_from_user` here copies `CONTEXT_COPY_SIZE` bytes from the user space to
the kernel space. `CONTEXT_COPY_SIZE` is defined as `sizeof(struct sigcontext)`,
returns -EFAULT on failure.
---#
arch/x86/kernel/signal.c:469
---
static int __setup_rt_frame(int sig, struct ksignal *ksig,
			    sigset_t *set, struct pt_regs *regs)
{
	struct rt_sigframe __user *frame;
	void __user *fp = NULL;
	unsigned long uc_flags;

	/* x86-64 should always use SA_RESTORER. */
	if (!(ksig->ka.sa.sa_flags & SA_RESTORER))
		return -EFAULT;

	frame = get_sigframe(&ksig->ka, regs, sizeof(struct rt_sigframe), &fp);
	uc_flags = frame_uc_flags(regs);

	if (!user_access_begin(frame, sizeof(*frame)))
		return -EFAULT;

	/* Create the ucontext.  */
	unsafe_put_user(uc_flags, &frame->uc.uc_flags, Efault);
	unsafe_put_user(0, &frame->uc.uc_link, Efault);
	unsafe_save_altstack(&frame->uc.uc_stack, regs->sp, Efault);

	/* Set up to return from userspace.  If provided, use a stub
	   already in userspace.  */
	unsafe_put_user(ksig->ka.sa.sa_restorer, &frame->pretcode, Efault);
	unsafe_put_sigcontext(&frame->uc.uc_mcontext, fp, regs, set, Efault);
	unsafe_put_sigmask(set, frame, Efault);
	user_access_end();

	if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
		if (copy_siginfo_to_user(&frame->info, &ksig->info)) ## x ##
			return -EFAULT;
	}

	/* Set up registers for signal handler */
	regs->di = sig;
	/* In case the signal handler was declared without prototypes */
	regs->ax = 0;

	/* This also works for non SA_SIGINFO handlers because they expect the
	   next argument after the signal number on the stack. */
	regs->si = (unsigned long)&frame->info;
	regs->dx = (unsigned long)&frame->uc;
	regs->ip = (unsigned long) ksig->ka.sa.sa_handler;

	regs->sp = (unsigned long)frame;

	/*
	 * Set up the CS and SS registers to run signal handlers in
	 * 64-bit mode, even if the handler happens to be interrupting
	 * 32-bit or 16-bit code.
	 *
	 * SS is subtle.  In 64-bit mode, we don't need any particular
	 * SS descriptor, but we do need SS to be valid.  It's possible
	 * that the old SS is entirely bogus -- this can happen if the
	 * signal we're trying to deliver is #GP or #SS caused by a bad
	 * SS value.  We also have a compatbility issue here: DOSEMU
	 * relies on the contents of the SS register indicating the
	 * SS value at the time of the signal, even though that code in
	 * DOSEMU predates sigreturn's ability to restore SS.  (DOSEMU
	 * avoids relying on sigreturn to restore SS; instead it uses
	 * a trampoline.)  So we do our best: if the old SS was valid,
	 * we keep it.  Otherwise we replace it.
	 */
	regs->cs = __USER_CS;

	if (unlikely(regs->ss != __USER_DS))
		force_valid_ss(regs);

	return 0;

Efault:
	user_access_end();
	return -EFAULT;
}
---
`__setup_rt_frame` is the function that sets up the frame for a signal.
`copy_siginfo_to_user` here copies `sizeof(struct siginfo)` bytes from the
kernel space to the user space. `sizeof(struct siginfo)` returns -EFAULT on
failure.
---#
arch/x86/lib/insn.c:156
---
/**
 * insn_get_prefixes - scan x86 instruction prefix bytes
 * @insn:	&struct insn containing instruction
 *
 * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
 * to point to the (first) opcode.  No effect if @insn->prefixes.got
 * is already set.
 */
void insn_get_prefixes(struct insn *insn)
{
	struct insn_field *prefixes = &insn->prefixes;
	insn_attr_t attr;
	insn_byte_t b, lb;
	int i, nb;

	if (prefixes->got)
		return;

# ...
    /* Decode REX prefix */
	if (insn->x86_64) { ## x ##
		b = peek_next(insn_byte_t, insn);
		attr = inat_get_opcode_attribute(b);
		if (inat_is_rex_prefix(attr)) {
			insn->rex_prefix.value = b;
			insn->rex_prefix.nbytes = 1;
			insn->next_byte++;
			if (X86_REX_W(b))
				/* REX.W overrides opnd_size */
				insn->opnd_bytes = 8;
		}
	}
	insn->rex_prefix.got = 1;

# ...
vex_end:
	insn->vex_prefix.got = 1;

	prefixes->got = 1;

err_out:
	return;
}
---
`insn_get_prefixes` is the function that gets the prefixes of an instruction.
if `insn->x86_64` is true, `insn_get_prefixes` will try to get the REX prefix
of the instruction. `inat_get_opcode_attribute` returns the attribute of the
byte. `inat_is_rex_prefix` checks if the attribute is a REX prefix. `X86_REX_W`
checks if the REX prefix is a 64-bit REX prefix. `insn->opnd_bytes` is the size
of the operand. `insn->opnd_bytes = 8` sets the operand size to 64-bit.
---#
arch/x86/mm/fault.c:1101
---
static inline int
access_error(unsigned long error_code, struct vm_area_struct *vma)
{
	/* This is only called for the current mm, so: */
	bool foreign = false;

	/*
	 * Read or write was blocked by protection keys.  This is
	 * always an unconditional error and can never result in
	 * a follow-up action to resolve the fault, like a COW.
	 */
	if (error_code & X86_PF_PK) ## x ##
		return 1;
# ...
	return 0;
}
---
`access_error` is the function that checks if the access is allowed. If the
access is not allowed, `access_error` returns 1. `error_code` is the error
code of the fault. `X86_PF_PK` is the bit that indicates if the access is
blocked by protection keys.
---#
arch/x86/mm/fault.c:1121
---
static inline int
access_error(unsigned long error_code, struct vm_area_struct *vma)
{
	/* This is only called for the current mm, so: */
	bool foreign = false;
# ...
/* read, present: */
	if (unlikely(error_code & X86_PF_PROT))
		return 1;

	/* read, not present: */
	if (unlikely(!vma_is_accessible(vma)))
		return 1;

	return 0;
}
---
`access_error` is the function that checks if the access is allowed. If the
access is not allowed, `access_error` returns 1. `error_code` is the error
code of the fault. `X86_PF_PROT` is the bit that indicates if the access is
blocked by protection keys.
---#
arch/x86/mm/fault.c:1131
---
bool fault_in_kernel_space(unsigned long address)
{
	/*
	 * On 64-bit systems, the vsyscall page is at an address above
	 * TASK_SIZE_MAX, but is not considered part of the kernel
	 * address space.
	 */
	if (IS_ENABLED(CONFIG_X86_64) && is_vsyscall_vaddr(address)) ## x ##
		return false;

	return address >= TASK_SIZE_MAX;
}
---
`fault_in_kernel_space` is the function that checks if the fault is in the
kernel space. `TASK_SIZE_MAX` is the maximum address of the user space.
In this line `is_vsyscall_vaddr` checks if the fault is in the vsyscall (a page
that contains the system call instructions) page and returns false if it is.
---#
arch/x86/mm/fault.c:1340
---
* Handle faults in the user portion of the address space */
static inline
void do_user_addr_fault(struct pt_regs *regs,
			unsigned long hw_error_code,
			unsigned long address)
{
	struct vm_area_struct *vma;
	struct task_struct *tsk;
	struct mm_struct *mm;
	vm_fault_t fault;
	unsigned int flags = FAULT_FLAG_DEFAULT;

	tsk = current;
	mm = tsk->mm;
# ...
	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { ## x ##
		bad_area(regs, hw_error_code, address);
		return;
	}
# ...
    check_v8086_mode(regs, address, tsk);
}
---
`do_user_addr_fault` is the function that handles the fault in the user space.
`VM_GROWSDOWN` is the flag that indicates if the stack grows down. If the
stack does not grow down, `bad_area` is called.
---#
arch/x86/mm/pat/memtype.c:1085
---
/*
 * untrack_pfn is called while unmapping a pfnmap for a region.
 * untrack can be called for a specific region indicated by pfn and size or
 * can be for the entire vma (in which case pfn, size are zero).
 */
void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
		 unsigned long size)
{
	resource_size_t paddr;
	unsigned long prot;

	if (vma && !(vma->vm_flags & VM_PAT)) ## x ##
		return;

	/* free the chunk starting from pfn or the whole chunk */
	paddr = (resource_size_t)pfn << PAGE_SHIFT;
	if (!paddr && !size) {
		if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
			WARN_ON_ONCE(1);
			return;
		}

		size = vma->vm_end - vma->vm_start;
	}
	free_pfn_range(paddr, size);
	if (vma)
		vma->vm_flags &= ~VM_PAT;
}
---
`untrack_pfn` is the function that untracks the pfn (physical frame number).
`VM_PAT` is the flag that indicates if the page is tracked. If the page is not
tracked, `untrack_pfn` returns.
---#
arch/x86/mm/tlb.c:559
---
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
			struct task_struct *tsk)
{
	struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
	u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
	bool was_lazy = this_cpu_read(cpu_tlbstate.is_lazy);
	unsigned cpu = smp_processor_id();
	u64 next_tlb_gen;
	bool need_flush;
	u16 new_asid;
# ...

	if (need_flush) { ## x ##
		this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
		this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
		load_new_mm_cr3(next->pgd, new_asid, true);

		trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
	} else {
		/* The new ASID is already up to date. */
		load_new_mm_cr3(next->pgd, new_asid, false);

		trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
	}

	/* Make sure we write CR3 before loaded_mm. */
	barrier();

	this_cpu_write(cpu_tlbstate.loaded_mm, next);
	this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);

	if (next != real_prev) {
		cr4_update_pce_mm(next);
		switch_ldt(real_prev, next);
	}
}
---
`switch_mm_irqs_off` is the function that switches the mm. `need_flush` is the
variable that indicates if the TLB needs to be flushed. If `need_flush` is
true, the TLB is flushed.
---#
arch/x86/mm/tlb.c:598
---
/*
 * Please ignore the name of this function.  It should be called
 * switch_to_kernel_thread().
 *
 * enter_lazy_tlb() is a hint from the scheduler that we are entering a
 * kernel thread or other context without an mm.  Acceptable implementations
 * include doing nothing whatsoever, switching to init_mm, or various clever
 * lazy tricks to try to minimize TLB flushes.
 *
 * The scheduler reserves the right to call enter_lazy_tlb() several times
 * in a row.  It will notify us that we're going back to a real mm by
 * calling switch_mm_irqs_off().
 */
void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
	if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm) ## x ##
		return;

	this_cpu_write(cpu_tlbstate.is_lazy, true);
}
---
this functions is called when the scheduler enters a kernel thread without a
mm. new mm is set to `init_mm` and returns. Else `cpu_tlbstate.is_lazy` is set
to true.
---#
arch/x86/mm/tlb.c:818
---
STATIC_NOPV void native_flush_tlb_others(const struct cpumask *cpumask,
					 const struct flush_tlb_info *info)
{
	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
	if (info->end == TLB_FLUSH_ALL)
		trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
	else
		trace_tlb_flush(TLB_REMOTE_SEND_IPI,
				(info->end - info->start) >> PAGE_SHIFT);

	/*
	 * If no page tables were freed, we can skip sending IPIs to
	 * CPUs in lazy TLB mode. They will flush the CPU themselves
	 * at the next context switch.
	 *
	 * However, if page tables are getting freed, we need to send the
	 * IPI everywhere, to prevent CPUs in lazy TLB mode from tripping
	 * up on the new contents of what used to be page tables, while
	 * doing a speculative memory access.
	 */
	if (info->freed_tables) ## x ##
		smp_call_function_many(cpumask, flush_tlb_func_remote,
			       (void *)info, 1);
	else
		on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func_remote,
				(void *)info, 1, cpumask);
}
---
`native_flush_tlb_others` is the function that flushes the TLB of other CPUs.
If `info->freed_tables` is true, the TLB of all CPUs is flushed. Else, the
TLB of CPUs that are not in lazy mode is flushed.
---#
block/bio.c:225
---
struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx,
			   mempool_t *pool)
{
	struct bio_vec *bvl;
# ...

		/*
		 * Try a slab allocation. If this fails and __GFP_DIRECT_RECLAIM
		 * is set, retry with the 1-entry mempool
		 */
		bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
		if (unlikely(!bvl && (gfp_mask & __GFP_DIRECT_RECLAIM))) { ## x ##
			*idx = BVEC_POOL_MAX;
			goto fallback;
		}
	}

	(*idx)++;
	return bvl;
}
---
`bvec_alloc` is the function that allocates a bio_vec (a bio_vec is a vector of
pages). ref:http://books.gigatux.nl/mirror/kerneldevelopment/0672327201/ch13lev1sec3.html
If the allocation fails and `__GFP_DIRECT_RECLAIM` is set, the allocation is
retried with the 1-entry mempool.
---#
/block/bio.c:503
---

/**
 * bio_alloc_bioset - allocate a bio for I/O
 * @gfp_mask:   the GFP_* mask given to the slab allocator
 * @nr_iovecs:	number of iovecs to pre-allocate
 * @bs:		the bio_set to allocate from.
 *
 * ...
*/

struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
{
struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
			     struct bio_set *bs)
{
	gfp_t saved_gfp = gfp_mask;
	unsigned front_pad;
	unsigned inline_vecs;
	struct bio_vec *bvl = NULL;
	struct bio *bio;
	void *p;

# ...

	if (nr_iovecs > inline_vecs) { ## x ##
		unsigned long idx = 0;

		bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool);
		if (!bvl && gfp_mask != saved_gfp) {
			punt_bios_to_rescuer(bs);
			gfp_mask = saved_gfp;
			bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool);
		}

		if (unlikely(!bvl))
			goto err_free;

		bio->bi_flags |= idx << BVEC_POOL_OFFSET;
	} else if (nr_iovecs) {
		bvl = bio->bi_inline_vecs;
	}


	bio->bi_pool = bs;
	bio->bi_max_vecs = nr_iovecs;
	bio->bi_io_vec = bvl;
	return bio;

err_free:
	mempool_free(p, &bs->bio_pool);
	return NULL;
}
---
`bio_alloc_bioset` is the function that allocates a bio. If `nr_iovecs` is
greater than `inline_vecs`, the allocation is retried with the 1-entry mempool.
Else if `nr_iovecs` is not 0, `bvl` is set to `bio->bi_inline_vecs` (a vector of
pages).
---#
block/bio.c:880
---
/**
 * __bio_try_merge_page - try appending data to an existing bvec.
 * @bio: destination bio
 * @page: start page to add
 * @len: length of the data to add
 * @off: offset of the data relative to @page
 * @same_page: return if the segment has been merged inside the same page
 *
 * Try to add the data at @page + @off to the last bvec of @bio.  This is a
 * useful optimisation for file systems with a block size smaller than the
 * page size.
 *
 * Warn if (@len, @off) crosses pages in case that @same_page is true.
 *
 * Return %true on success or %false on failure.
 */
bool __bio_try_merge_page(struct bio *bio, struct page *page,
		unsigned int len, unsigned int off, bool *same_page)
{
	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
		return false;

	if (bio->bi_vcnt > 0) {
		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];

		if (page_is_mergeable(bv, page, len, off, same_page)) {
			if (bio->bi_iter.bi_size > UINT_MAX - len) { ## x ##
				*same_page = false;
				return false;
			}
			bv->bv_len += len;
			bio->bi_iter.bi_size += len;
			return true;
		}
	}
	return false;
}
---
`__bio_try_merge_page` is the function that tries to append data to an existing
bvec. If `bio->bi_vcnt` is greater than 0, the last bvec of `bio` is retrieved
and if the page is mergeable, the length of the bvec is increased by `len` and
the size of the bio is increased by `len`. If the size of the bio is greater
than `UINT_MAX - len`, the page is not merged.
---#
block/bio.c:918
---
/**
 * __bio_add_page - add page(s) to a bio in a new segment
 * @bio: destination bio
 * @page: start page to add
 * @len: length of the data to add, may cross pages
 * @off: offset of the data relative to @page, may cross pages
 *
 * Add the data at @page + @off to @bio as a new bvec.  The caller must ensure
 * that @bio has space for another bvec.
 */
void __bio_add_page(struct bio *bio, struct page *page,
		unsigned int len, unsigned int off)
{
	struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];

	WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
	WARN_ON_ONCE(bio_full(bio, len));

	bv->bv_page = page;
	bv->bv_offset = off;
	bv->bv_len = len;

	bio->bi_iter.bi_size += len;
	bio->bi_vcnt++;

	if (!bio_flagged(bio, BIO_WORKINGSET) && unlikely(PageWorkingset(page))) ## x ##
		bio_set_flag(bio, BIO_WORKINGSET);
}
EXPORT_SYMBOL_GPL(__bio_add_page);
---
`__bio_add_page` is the function that adds a page to a bio in a new segment. The
caller must ensure that the bio has space for another bvec. The page is added
to the bio and the size of the bio is increased by `len`. If the page is in the
workingset, the bio is flagged as being in the workingset (a workingset is a
set of pages that are frequently accessed).
---#
block/blk-core.c:832
---
static noinline_for_stack bool submit_bio_checks(struct bio *bio)
{
	struct request_queue *q = bio->bi_disk->queue;
	blk_status_t status = BLK_STS_IOERR;
	struct blk_plug *plug;

	might_sleep();

	plug = blk_mq_plug(q, bio);
	if (plug && plug->nowait)
		bio->bi_opf |= REQ_NOWAIT;

	/*
	 * For a REQ_NOWAIT based request, return -EOPNOTSUPP
	 * if queue does not support NOWAIT.
	 */
	if ((bio->bi_opf & REQ_NOWAIT) && !blk_queue_nowait(q))
		goto not_supported;

	if (should_fail_bio(bio))
		goto end_io;

	if (bio->bi_partno) {
		if (unlikely(blk_partition_remap(bio)))
			goto end_io;
	} else {
		if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0))) ## x ##
			goto end_io;
		if (unlikely(bio_check_eod(bio, get_capacity(bio->bi_disk))))
			goto end_io;
	}

# ...

not_supported:
	status = BLK_STS_NOTSUPP;
end_io:
	bio->bi_status = status;
	bio_endio(bio);
	return false;
}
---
`submit_bio_checks` is the function that checks if a bio can be submitted. If
the bio is read-only, the bio is ended and `false` is returned.
---#
block/blk-core.c:1269
---
static void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
{
	unsigned long stamp;
again:
	stamp = READ_ONCE(part->stamp);
	if (unlikely(stamp != now)) { ## x ##
		if (likely(cmpxchg(&part->stamp, stamp, now) == stamp))
			__part_stat_add(part, io_ticks, end ? now - stamp : 1);
	}
	if (part->partno) {
		part = &part_to_disk(part)->part0;
		goto again;
	}
}
---
`update_io_ticks` is the function that updates the io ticks of a partition.
`part->stamp` is the timestamp of the last io operation. If `part->stamp` is
different than `now`, `part->stamp` is updated to `now` and the io ticks of
`part` are increased by 1. If `end` is true, the io ticks of `part` are
increased by `now - stamp`.
---#
block/blk-core.c:1272
---
static void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
{
	unsigned long stamp;
again:
	stamp = READ_ONCE(part->stamp);
	if (unlikely(stamp != now)) {
		if (likely(cmpxchg(&part->stamp, stamp, now) == stamp))
			__part_stat_add(part, io_ticks, end ? now - stamp : 1);
	}
	if (part->partno) { ## x ##
		part = &part_to_disk(part)->part0;
		goto again;
	}
}
---
`update_io_ticks` is the function that updates the io ticks of a partition.
`part->partno` is the partition number. If `part->partno` is not 0, the
partition of `part` is retrieved and the function is called again.
---#
block/blk-core.c:1775
---
/**
 * blk_finish_plug - mark the end of a batch of submitted I/O
 * @plug:	The &struct blk_plug passed to blk_start_plug()
 *
 * Description:
 * Indicate that a batch of I/O submissions is complete.  This function
 * must be paired with an initial call to blk_start_plug().  The intent
 * is to allow the block layer to optimize I/O submission.  See the
 * documentation for blk_start_plug() for more information.
 */
void blk_finish_plug(struct blk_plug *plug)
{
	if (plug != current->plug) ## x ##
		return;
	blk_flush_plug_list(plug, false);

	current->plug = NULL;
}
---
`blk_finish_plug` is the function that marks the end of a batch of submitted
io. If `plug` is not the current plug, the function returns. Else the plug list
is flushed and the current plug is set to `NULL`.
---#
block/blk-mq-sched.c:341
---
/*
 * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
 * its queue by itself in its completion handler, so we don't need to
 * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
 *
 * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
 * be run again.  This is necessary to avoid starving flushes.
 */
static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
{
	struct request_queue *q = hctx->queue;
	struct elevator_queue *e = q->elevator;
	bool multi_hctxs = false, run_queue = false;
	bool dispatched = false, busy = false;
	unsigned int max_dispatch;
	LIST_HEAD(rq_list);
	int count = 0;
# ...
	if (busy) ## x ##
		return -EAGAIN;
	return !!dispatched;
}
---
`__blk_mq_do_dispatch_sched` is the function that dispatches requests from the
scheduler. If the hctx (hardware context) is busy, -EAGAIN is returned. Else 0
is returned.
---#
block/blk-mq.c:605
---
static void blk_mq_trigger_softirq(struct request *rq)
{
	struct list_head *list;
	unsigned long flags;

	local_irq_save(flags);
	list = this_cpu_ptr(&blk_cpu_done);
	list_add_tail(&rq->ipi_list, list);

	/*
	 * If the list only contains our just added request, signal a raise of
	 * the softirq.  If there are already entries there, someone already
	 * raised the irq but it hasn't run yet.
	 */
	if (list->next == &rq->ipi_list)
		raise_softirq_irqoff(BLOCK_SOFTIRQ); ## x ##
	local_irq_restore(flags);
}
---
`blk_mq_trigger_softirq` is the function that triggers the softirq.
if `list->next` is equal to `&rq->ipi_list`, it means the list only contains
entries added by the current cpu. In this case, the softirq is raised.
Else, the softirq is already raised and it hasn't run yet.
---#
/block/bounce.c:377
---
void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
{
	mempool_t *pool;

	/*
	 * Data-less bio, nothing to bounce
	 */
	if (!bio_has_data(*bio_orig))
		return;

	/*
	 * for non-isa bounce case, just check if the bounce pfn is equal
	 * to or bigger than the highest pfn in the system -- in that case,
	 * don't waste time iterating over bio segments
	 */
	if (!(q->bounce_gfp & GFP_DMA)) {
		if (q->limits.bounce_pfn >= blk_max_pfn) ## x ##
			return;
		pool = &page_pool;
	} else {
		BUG_ON(!mempool_initialized(&isa_page_pool));
		pool = &isa_page_pool;
	}

	/*
	 * slow path
	 */
	__blk_queue_bounce(q, bio_orig, pool);
}
---
`blk_queue_bounce` is the function that bounces a bio. If the bounce pfn is
equal to or bigger than the highest pfn in the system, the function returns.
---#
fs/exec.c:449
---
**
 * count() counts the number of strings in array ARGV.
 */
static int count(struct user_arg_ptr argv, int max)
{
	int i = 0;

	if (argv.ptr.native != NULL) {
		for (;;) {
			const char __user *p = get_user_arg_ptr(argv, i);

			if (!p)
				break;

			if (IS_ERR(p))
				return -EFAULT;

			if (i >= max)
				return -E2BIG;
			++i;

			if (fatal_signal_pending(current))
				return -ERESTARTNOHAND;
			cond_resched(); ## x ##
		}
	}
	return i;
}
---
`count` is the function that counts the number of strings in an array.
`p` is the string at index `i` in the array.
`cond_resched` is called to reschedule the current task.
---#
fs/exec.c:1022
---
/*
 * Maps the mm_struct mm into the current task struct.
 * On success, this function returns with the mutex
 * exec_update_mutex locked.
 */
static int exec_mmap(struct mm_struct *mm)
{
	struct task_struct *tsk;
	struct mm_struct *old_mm, *active_mm;
	int ret;
# ...
	if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
		local_irq_enable();
	activate_mm(active_mm, mm);
	if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
		local_irq_enable();
	tsk->mm->vmacache_seqnum = 0; ## x ##
	vmacache_flush(tsk);
	task_unlock(tsk);
	if (old_mm) {
		mmap_read_unlock(old_mm);
		BUG_ON(active_mm != old_mm);
		setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm);
		mm_update_next_owner(old_mm);
		mmput(old_mm);
		return 0;
	}
	mmdrop(active_mm);
	return 0;
}
---
`exec_mmap` is the function that maps the mm_struct `mm` into the current task
struct.
If `CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM` is enabled (which is the case for
x86), `local_irq_enable` is called before `activate_mm`.
---#
fs/exec.c:1850
---
/*
 * sys_execve() executes a new program.
 */
static int bprm_execve(struct linux_binprm *bprm,
		       int fd, struct filename *filename, int flags)
{
	struct file *file;
	struct files_struct *displaced;
	int retval;

	/*
	 * Cancel any io_uring activity across execve
	 */
	io_uring_task_cancel();

	retval = unshare_files(&displaced);
	if (retval)
		return retval;
# ...
out_files:
	if (displaced)
		reset_files_struct(displaced); ## x ##

	return retval;
}
---
`bprm_execve` is the function that executes a new program.
`displaced` is the files struct that is displaced by the new files struct.
`reset_files_struct` is called to reset the files struct.
---#

## sample-annotation.txt
---#
source file : line number
---
code line a
code line b  ## x ## <--------- marks the specific line noted above
code line c
---
description of the code


an example:

---#
arch/x86/kernel/process.c:119
---
static int set_new_tls(struct task_struct *p, unsigned long tls)
{
	struct user_desc __user *utls = (struct user_desc __user *)tls;

	if (in_ia32_syscall()) ## x ##
		return do_set_thread_area(p, -1, utls, 0);
	else
		return do_set_thread_area_64(p, ARCH_SET_FS, tls);
}
---
`in_ia32_syscall()` check if the current process is in a 32-bit syscall.
if so, it will use `do_set_thread_area()` to set the `tls` (Thread Local Storage).
Otherwise, it will use `do_set_thread_area_64()` to set the `tls`.

## small_martix.txt
---#
fs/exec.c:196
---
/*
 * The nascent bprm->mm is not visible until exec_mmap() but it can
 * use a lot of memory, account these pages in current->mm temporary
 * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we
 * change the counter back via acct_arg_size(0).
 */
static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
{
	struct mm_struct *mm = current->mm;
	long diff = (long)(pages - bprm->vma_pages);

	if (!mm || !diff)
		return;

	bprm->vma_pages = pages; ## x ##
	add_mm_counter(mm, MM_ANONPAGES, diff);
}
---
`acct_arg_size` is the function that accounts the size of the arguments.
`add_mm_counter` is called to add the counter `diff` to the mm counter
if `mm` or `diff` fails to be allocated, the function returns before adding the
counter.
---#
block/bio.c:258
---
static void bio_free(struct bio *bio)
{
	struct bio_set *bs = bio->bi_pool;
	void *p;

	bio_uninit(bio);

	if (bs) {
		bvec_free(&bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio)); ## x ##

		/*
		 * If we have front padding, adjust the bio pointer before freeing
		 */
		p = bio;
		p -= bs->front_pad;

		mempool_free(p, &bs->bio_pool);
	} else {
		/* Bio was allocated by bio_kmalloc() */
		kfree(bio);
	}
}
---
`bio_free` is the function that frees the bio.
`bs` is the bio_set that the bio belongs to. if `bs` is null, then the bio was
allocated by `bio_kmalloc` and is freed by `kfree`. Otherwise, the bio is freed
by `mempool_free`.
---#
block/bio.c:876
---
/**
 * __bio_try_merge_page - try appending data to an existing bvec.
 * @bio: destination bio
 * @page: start page to add
 * @len: length of the data to add
 * @off: offset of the data relative to @page
 * @same_page: return if the segment has been merged inside the same page
 *
 * Try to add the data at @page + @off to the last bvec of @bio.  This is a
 * useful optimisation for file systems with a block size smaller than the
 * page size.
 *
 * Warn if (@len, @off) crosses pages in case that @same_page is true.
 *
 * Return %true on success or %false on failure.
 */
bool __bio_try_merge_page(struct bio *bio, struct page *page,
		unsigned int len, unsigned int off, bool *same_page)
{
	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
		return false;

	if (bio->bi_vcnt > 0) { ## x ##
		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];

		if (page_is_mergeable(bv, page, len, off, same_page)) {
			if (bio->bi_iter.bi_size > UINT_MAX - len) {
				*same_page = false;
				return false;
			}
			bv->bv_len += len;
			bio->bi_iter.bi_size += len;
			return true;
		}
	}
	return false;
}
---
`__bio_try_merge_page` is the function that tries to merge the page to the bio.
`bio->bi_vcnt` is the number of bio_vecs in the bio. if `bio->bi_vcnt` is
greater than 0, then the bio has at least one bio_vec. `bv` is the last bio_vec
in the bio.
---#
mm/memory.c:488
---
static inline void add_mm_rss_vec(struct mm_struct *mm, int *rss)
{
	int i;

	if (current->mm == mm)
		sync_mm_rss(mm); ## x ##
	for (i = 0; i < NR_MM_COUNTERS; i++)
		if (rss[i])
			add_mm_counter(mm, i, rss[i]);
}
---
`add_mm_rss_vec` is the function that adds the rss (Resident Set Size and is
used to show how much memory is allocated to that process and is in RAM) to the
mm counter.
---#
mm/memory.c:1444
---
static void unmap_single_vma(struct mmu_gather *tlb,
		struct vm_area_struct *vma, unsigned long start_addr,
		unsigned long end_addr,
		struct zap_details *details)
{
	unsigned long start = max(vma->vm_start, start_addr);
	unsigned long end;

	if (start >= vma->vm_end)
		return; ## x ##
# ...
}
---
`unmap_single_vma` is the function that unmaps a single vma (virtual memory area)
from the tlb (Translation Lookaside Buffer).
`start` is the maximum of the vma start and the start address. if `start` is
greater than or equal to the vma end, then the function returns.
---#

## trace-notes.md

      
    Raw
  

              trace-notes.md
            
          
    How I filter the trace of just the application?

The longest running process in a qemu instance in the application i'm tracing.
Thus is the CR3 with the largest count = application

all pc
      1 CR3=0000000000000000
      1 CR3=0000000002d58000
      1 CR3=0000000100150000
      2 CR3=0000000101572000
      3 CR3=00000001015ba000
      6 CR3=000000010152a000
     11 CR3=000000000331f000
     27 CR3=000000010157e000
     28 CR3=0000000101608000
     93 CR3=00000001015fe000
    181 CR3=0000000101570000
    250 CR3=00000001014ea000
   1812 CR3=000000000260c000
 188926 CR3=000000010157c000 <-------- interested application

kernel pc
      1 CR3=0000000002d58000
      1 CR3=0000000100150000
      2 CR3=0000000101572000
      3 CR3=00000001015ba000
      4 CR3=000000010152a000
     18 CR3=000000010157e000
     19 CR3=00000001015fe000
     27 CR3=0000000101608000
     45 CR3=0000000101570000
     94 CR3=000000010157c000 <-------- interested application
    155 CR3=00000001014ea000
   1812 CR3=000000000260c000

user pc
      1 CR3=0000000000000000
      1 CR3=0000000101608000
      2 CR3=000000010152a000
      9 CR3=000000010157e000
     11 CR3=000000000331f000
     74 CR3=00000001015fe000
     95 CR3=00000001014ea000
    136 CR3=0000000101570000
 188832 CR3=000000010157c000 <-------- interested application
	---#
	source file : line number
	---
	code line a
	code line b ## x ## <--------- marks the specific line noted above
	code line c
	---
	description of the code


	an example:

	---#
	arch/x86/kernel/process.c:119
	---
	static int set_new_tls(struct task_struct *p, unsigned long tls)
	{
	struct user_desc __user utls = (struct user_desc __user )tls;

	if (in_ia32_syscall()) ## x ##
	return do_set_thread_area(p, -1, utls, 0);
	else
	return do_set_thread_area_64(p, ARCH_SET_FS, tls);
	}
	---
	`in_ia32_syscall()` check if the current process is in a 32-bit syscall.
	if so, it will use `do_set_thread_area()` to set the `tls` (Thread Local Storage).
	Otherwise, it will use `do_set_thread_area_64()` to set the `tls`.
	---#
	fs/exec.c:196
	---
	/*
	* The nascent bprm->mm is not visible until exec_mmap() but it can
	* use a lot of memory, account these pages in current->mm temporary
	* for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we
	* change the counter back via acct_arg_size(0).
	*/
	static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
	{
	struct mm_struct *mm = current->mm;
	long diff = (long)(pages - bprm->vma_pages);

	if (!mm \|\| !diff)
	return;

	bprm->vma_pages = pages; ## x ##
	add_mm_counter(mm, MM_ANONPAGES, diff);
	}
	---
	`acct_arg_size` is the function that accounts the size of the arguments.
	`add_mm_counter` is called to add the counter `diff` to the mm counter
	if `mm` or `diff` fails to be allocated, the function returns before adding the
	counter.
	---#
	block/bio.c:258
	---
	static void bio_free(struct bio *bio)
	{
	struct bio_set *bs = bio->bi_pool;
	void *p;

	bio_uninit(bio);

	if (bs) {
	bvec_free(&bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio)); ## x ##

	/*
	* If we have front padding, adjust the bio pointer before freeing
	*/
	p = bio;
	p -= bs->front_pad;

	mempool_free(p, &bs->bio_pool);
	} else {
	/* Bio was allocated by bio_kmalloc() */
	kfree(bio);
	}
	}
	---
	`bio_free` is the function that frees the bio.
	`bs` is the bio_set that the bio belongs to. if `bs` is null, then the bio was
	allocated by `bio_kmalloc` and is freed by `kfree`. Otherwise, the bio is freed
	by `mempool_free`.
	---#
	block/bio.c:876
	---
	/**
	* __bio_try_merge_page - try appending data to an existing bvec.
	* @bio: destination bio
	* @page: start page to add
	* @len: length of the data to add
	* @off: offset of the data relative to @page
	* @same_page: return if the segment has been merged inside the same page
	*
	* Try to add the data at @page + @off to the last bvec of @bio. This is a
	* useful optimisation for file systems with a block size smaller than the
	* page size.
	*
	* Warn if (@len, @off) crosses pages in case that @same_page is true.
	*
	* Return %true on success or %false on failure.
	*/
	bool __bio_try_merge_page(struct bio bio, struct page page,
	unsigned int len, unsigned int off, bool *same_page)
	{
	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
	return false;

	if (bio->bi_vcnt > 0) { ## x ##
	struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];

	if (page_is_mergeable(bv, page, len, off, same_page)) {
	if (bio->bi_iter.bi_size > UINT_MAX - len) {
	*same_page = false;
	return false;
	}
	bv->bv_len += len;
	bio->bi_iter.bi_size += len;
	return true;
	}
	}
	return false;
	}
	---
	`__bio_try_merge_page` is the function that tries to merge the page to the bio.
	`bio->bi_vcnt` is the number of bio_vecs in the bio. if `bio->bi_vcnt` is
	greater than 0, then the bio has at least one bio_vec. `bv` is the last bio_vec
	in the bio.
	---#
	mm/memory.c:488
	---
	static inline void add_mm_rss_vec(struct mm_struct mm, int rss)
	{
	int i;

	if (current->mm == mm)
	sync_mm_rss(mm); ## x ##
	for (i = 0; i < NR_MM_COUNTERS; i++)
	if (rss[i])
	add_mm_counter(mm, i, rss[i]);
	}
	---
	`add_mm_rss_vec` is the function that adds the rss (Resident Set Size and is
	used to show how much memory is allocated to that process and is in RAM) to the
	mm counter.
	---#
	mm/memory.c:1444
	---
	static void unmap_single_vma(struct mmu_gather *tlb,
	struct vm_area_struct *vma, unsigned long start_addr,
	unsigned long end_addr,
	struct zap_details *details)
	{
	unsigned long start = max(vma->vm_start, start_addr);
	unsigned long end;

	if (start >= vma->vm_end)
	return; ## x ##
	# ...
	}
	---
	`unmap_single_vma` is the function that unmaps a single vma (virtual memory area)
	from the tlb (Translation Lookaside Buffer).
	`start` is the maximum of the vma start and the start address. if `start` is
	greater than or equal to the vma end, then the function returns.
	---#