Skip to content

Instantly share code, notes, and snippets.

@pzb
Created December 22, 2018 00:03
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save pzb/95ab0846a52e8721de1a52f743c2cb88 to your computer and use it in GitHub Desktop.
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 64037895b085..d598d65db32c 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -33,7 +33,6 @@
#include <asm/processor-flags.h>
#include <asm/asm-offsets.h>
#include <asm/bootparam.h>
-#include "pgtable.h"
/*
* Locally defined symbols should be marked hidden:
@@ -305,55 +304,15 @@ ENTRY(startup_64)
/* Set up the stack */
leaq boot_stack_end(%rbx), %rsp
- /*
- * paging_prepare() and cleanup_trampoline() below can have GOT
- * references. Adjust the table with address we are running at.
- *
- * Zero RAX for adjust_got: the GOT was not adjusted before;
- * there's no adjustment to undo.
- */
- xorq %rax, %rax
-
- /*
- * Calculate the address the binary is loaded at and use it as
- * a GOT adjustment.
- */
- call 1f
-1: popq %rdi
- subq $1b, %rdi
-
- call adjust_got
-
/*
* At this point we are in long mode with 4-level paging enabled,
- * but we might want to enable 5-level paging or vice versa.
- *
- * The problem is that we cannot do it directly. Setting or clearing
- * CR4.LA57 in long mode would trigger #GP. So we need to switch off
- * long mode and paging first.
+ * but we want to enable 5-level paging.
*
- * We also need a trampoline in lower memory to switch over from
- * 4- to 5-level paging for cases when the bootloader puts the kernel
- * above 4G, but didn't enable 5-level paging for us.
- *
- * The same trampoline can be used to switch from 5- to 4-level paging
- * mode, like when starting 4-level paging kernel via kexec() when
- * original kernel worked in 5-level paging mode.
- *
- * For the trampoline, we need the top page table to reside in lower
- * memory as we don't have a way to load 64-bit values into CR3 in
- * 32-bit mode.
- *
- * We go though the trampoline even if we don't have to: if we're
- * already in a desired paging mode. This way the trampoline code gets
- * tested on every boot.
+ * The problem is that we cannot do it directly. Setting LA57 in
+ * long mode would trigger #GP. So we need to switch off long mode
+ * first.
*/
- /* Make sure we have GDT with 32-bit code segment */
- leaq gdt(%rip), %rax
- movq %rax, gdt64+2(%rip)
- lgdt gdt64(%rip)
-
/*
* paging_prepare() sets up the trampoline and checks if we need to
* enable 5-level paging.
@@ -365,61 +324,41 @@ ENTRY(startup_64)
* this function call.
*/
pushq %rsi
- movq %rsi, %rdi /* real mode address */
call paging_prepare
popq %rsi
/* Save the trampoline address in RCX */
movq %rax, %rcx
+ /* Check if we need to enable 5-level paging */
+ cmpq $0, %rdx
+ jz lvl5
+
+ /* Clear additional page table */
+ leaq lvl5_pgtable(%rbx), %rdi
+ xorq %rax, %rax
+ movq $(PAGE_SIZE/8), %rcx
+ rep stosq
+
/*
- * Load the address of trampoline_return() into RDI.
- * It will be used by the trampoline to return to the main code.
+ * Setup current CR3 as the first and only entry in a new top level
+ * page table.
*/
- leaq trampoline_return(%rip), %rdi
+ movq %cr3, %rdi
+ leaq 0x7 (%rdi), %rax
+ movq %rax, lvl5_pgtable(%rbx)
/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
pushq $__KERNEL32_CS
- leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
+ leaq compatible_mode(%rip), %rax
pushq %rax
lretq
-trampoline_return:
- /* Restore the stack, the 32-bit trampoline uses its own stack */
- leaq boot_stack_end(%rbx), %rsp
-
- /*
- * cleanup_trampoline() would restore trampoline memory.
- *
- * RDI is address of the page table to use instead of page table
- * in trampoline memory (if required).
- *
- * RSI holds real mode data and needs to be preserved across
- * this function call.
- */
- pushq %rsi
- leaq top_pgtable(%rbx), %rdi
- call cleanup_trampoline
- popq %rsi
+lvl5:
/* Zero EFLAGS */
pushq $0
popfq
- /*
- * Previously we've adjusted the GOT with address the binary was
- * loaded at. Now we need to re-adjust for relocation address.
- *
- * Calculate the address the binary is loaded at, so that we can
- * undo the previous GOT adjustment.
- */
- call 1f
-1: popq %rax
- subq $1b, %rax
-
- /* The new adjustment is the relocation address */
- movq %rbx, %rdi
- call adjust_got
-
/*
* Copy the compressed kernel to the end of our buffer
* where decompression in place becomes safe.
@@ -520,6 +459,19 @@ relocated:
shrq $3, %rcx
rep stosq
+/*
+ * Adjust our own GOT
+ */
+ leaq _got(%rip), %rdx
+ leaq _egot(%rip), %rcx
+1:
+ cmpq %rcx, %rdx
+ jae 2f
+ addq %rbx, (%rdx)
+ addq $8, %rdx
+ jmp 1b
+2:
+
/*
* Do the extraction, and jump to the new kernel..
*/
@@ -538,104 +490,45 @@ relocated:
*/
jmp *%rax
-/*
- * Adjust the global offset table
- *
- * RAX is the previous adjustment of the table to undo (use 0 if it's the
- * first time we touch GOT).
- * RDI is the new adjustment to apply.
- */
-adjust_got:
- /* Walk through the GOT adding the address to the entries */
- leaq _got(%rip), %rdx
- leaq _egot(%rip), %rcx
-1:
- cmpq %rcx, %rdx
- jae 2f
- subq %rax, (%rdx) /* Undo previous adjustment */
- addq %rdi, (%rdx) /* Apply the new adjustment */
- addq $8, %rdx
- jmp 1b
-2:
- ret
-
.code32
-/*
- * This is the 32-bit trampoline that will be copied over to low memory.
- *
- * RDI contains the return address (might be above 4G).
- * ECX contains the base address of the trampoline memory.
- * Non zero RDX on return means we need to enable 5-level paging.
- */
-ENTRY(trampoline_32bit_src)
- /* Set up data and stack segments */
+compatible_mode:
+ /* Setup data and stack segments */
movl $__KERNEL_DS, %eax
movl %eax, %ds
movl %eax, %ss
- /* Set up new stack */
- leal TRAMPOLINE_32BIT_STACK_END(%ecx), %esp
-
/* Disable paging */
movl %cr0, %eax
btrl $X86_CR0_PG_BIT, %eax
movl %eax, %cr0
- /* Check what paging mode we want to be in after the trampoline */
- cmpl $0, %edx
- jz 1f
+ /* Point CR3 to 5-level paging */
+ leal lvl5_pgtable(%ebx), %eax
+ movl %eax, %cr3
- /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */
- movl %cr4, %eax
- testl $X86_CR4_LA57, %eax
- jnz 3f
- jmp 2f
-1:
- /* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */
+ /* Enable PAE and LA57 mode */
movl %cr4, %eax
- testl $X86_CR4_LA57, %eax
- jz 3f
-2:
- /* Point CR3 to the trampoline's new top level page table */
- leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax
- movl %eax, %cr3
-3:
- /* Enable PAE and LA57 (if required) paging modes */
- movl $X86_CR4_PAE, %eax
- cmpl $0, %edx
- jz 1f
- orl $X86_CR4_LA57, %eax
-1:
+ orl $(X86_CR4_PAE | X86_CR4_LA57), %eax
movl %eax, %cr4
- /* Calculate address of paging_enabled() once we are executing in the trampoline */
- leal paging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
+ /* Calculate address we are running at */
+ call 1f
+1: popl %edi
+ subl $1b, %edi
- /* Prepare the stack for far return to Long Mode */
+ /* Prepare stack for far return to Long Mode */
pushl $__KERNEL_CS
- pushl %eax
+ leal lvl5(%edi), %eax
+ push %eax
- /* Enable paging again */
+ /* Enable paging back */
movl $(X86_CR0_PG | X86_CR0_PE), %eax
movl %eax, %cr0
lret
- .code64
-paging_enabled:
- /* Return from the trampoline */
- jmp *%rdi
-
- /*
- * The trampoline code has a size limit.
- * Make sure we fail to compile if the trampoline code grows
- * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes.
- */
- .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
-
- .code32
no_longmode:
- /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
+ /* This isn't an x86-64 CPU so hang */
1:
hlt
jmp 1b
@@ -643,11 +536,6 @@ no_longmode:
#include "../../kernel/verify_cpu.S"
.data
-gdt64:
- .word gdt_end - gdt
- .long 0
- .word 0
- .quad 0
gdt:
.word gdt_end - gdt
.long gdt
@@ -696,10 +584,5 @@ boot_stack_end:
.balign 4096
pgtable:
.fill BOOT_PGT_SIZE, 1, 0
-
-/*
- * The page table is going to be used instead of page table in the trampoline
- * memory.
- */
-top_pgtable:
+lvl5_pgtable:
.fill PAGE_SIZE, 1, 0
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 8dd1d5ccae58..b50c42455e25 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -14,7 +14,6 @@
#include "misc.h"
#include "error.h"
-#include "pgtable.h"
#include "../string.h"
#include "../voffset.h"
@@ -300,10 +299,6 @@ static void parse_elf(void *output)
switch (phdr->p_type) {
case PT_LOAD:
-#ifdef CONFIG_X86_64
- if ((phdr->p_align % 0x200000) != 0)
- error("Alignment of LOAD segment isn't multiple of 2MB");
-#endif
#ifdef CONFIG_RELOCATABLE
dest = output;
dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR);
@@ -377,11 +372,6 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
debug_putaddr(output_len);
debug_putaddr(kernel_total_size);
-#ifdef CONFIG_X86_64
- /* Report address of 32-bit trampoline */
- debug_putaddr(trampoline_32bit);
-#endif
-
/*
* The memory hole needed for the kernel is the larger of either
* the entire decompressed kernel plus relocation table, or the
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index 9e2157371491..45c76eff2718 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -1,7 +1,4 @@
-#include <asm/e820/types.h>
#include <asm/processor.h>
-#include "pgtable.h"
-#include "../string.h"
/*
* __force_order is used by special_insns.h asm code to force instruction
@@ -12,95 +9,20 @@
*/
unsigned long __force_order;
-#define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */
-#define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */
-
struct paging_config {
unsigned long trampoline_start;
unsigned long l5_required;
};
-/* Buffer to preserve trampoline memory */
-static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
-
-/*
- * Trampoline address will be printed by extract_kernel() for debugging
- * purposes.
- *
- * Avoid putting the pointer into .bss as it will be cleared between
- * paging_prepare() and extract_kernel().
- */
-unsigned long *trampoline_32bit __section(.data);
-
-extern struct boot_params *boot_params;
-int cmdline_find_option_bool(const char *option);
-
-static unsigned long find_trampoline_placement(void)
-{
- unsigned long bios_start, ebda_start;
- unsigned long trampoline_start;
- struct boot_e820_entry *entry;
- int i;
-
- /*
- * Find a suitable spot for the trampoline.
- * This code is based on reserve_bios_regions().
- */
-
- ebda_start = *(unsigned short *)0x40e << 4;
- bios_start = *(unsigned short *)0x413 << 10;
-
- if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX)
- bios_start = BIOS_START_MAX;
-
- if (ebda_start > BIOS_START_MIN && ebda_start < bios_start)
- bios_start = ebda_start;
-
- bios_start = round_down(bios_start, PAGE_SIZE);
-
- /* Find the first usable memory region under bios_start. */
- for (i = boot_params->e820_entries - 1; i >= 0; i--) {
- entry = &boot_params->e820_table[i];
-
- /* Skip all entries above bios_start. */
- if (bios_start <= entry->addr)
- continue;
-
- /* Skip non-RAM entries. */
- if (entry->type != E820_TYPE_RAM)
- continue;
-
- /* Adjust bios_start to the end of the entry if needed. */
- if (bios_start > entry->addr + entry->size)
- bios_start = entry->addr + entry->size;
-
- /* Keep bios_start page-aligned. */
- bios_start = round_down(bios_start, PAGE_SIZE);
-
- /* Skip the entry if it's too small. */
- if (bios_start - TRAMPOLINE_32BIT_SIZE < entry->addr)
- continue;
-
- break;
- }
-
- /* Place the trampoline just below the end of low memory */
- return bios_start - TRAMPOLINE_32BIT_SIZE;
-}
-
-struct paging_config paging_prepare(void *rmode)
+struct paging_config paging_prepare(void)
{
struct paging_config paging_config = {};
- /* Initialize boot_params. Required for cmdline_find_option_bool(). */
- boot_params = rmode;
-
/*
* Check if LA57 is desired and supported.
*
- * There are several parts to the check:
+ * There are two parts to the check:
* - if the kernel supports 5-level paging: CONFIG_X86_5LEVEL=y
- * - if user asked to disable 5-level paging: no5lvl in cmdline
* - if the machine supports 5-level paging:
* + CPUID leaf 7 is supported
* + the leaf has the feature bit set
@@ -108,84 +30,10 @@ struct paging_config paging_prepare(void *rmode)
* That's substitute for boot_cpu_has() in early boot code.
*/
if (IS_ENABLED(CONFIG_X86_5LEVEL) &&
- !cmdline_find_option_bool("no5lvl") &&
native_cpuid_eax(0) >= 7 &&
(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
paging_config.l5_required = 1;
}
- paging_config.trampoline_start = find_trampoline_placement();
-
- trampoline_32bit = (unsigned long *)paging_config.trampoline_start;
-
- /* Preserve trampoline memory */
- memcpy(trampoline_save, trampoline_32bit, TRAMPOLINE_32BIT_SIZE);
-
- /* Clear trampoline memory first */
- memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE);
-
- /* Copy trampoline code in place */
- memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
- &trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE);
-
- /*
- * The code below prepares page table in trampoline memory.
- *
- * The new page table will be used by trampoline code for switching
- * from 4- to 5-level paging or vice versa.
- *
- * If switching is not required, the page table is unused: trampoline
- * code wouldn't touch CR3.
- */
-
- /*
- * We are not going to use the page table in trampoline memory if we
- * are already in the desired paging mode.
- */
- if (paging_config.l5_required == !!(native_read_cr4() & X86_CR4_LA57))
- goto out;
-
- if (paging_config.l5_required) {
- /*
- * For 4- to 5-level paging transition, set up current CR3 as
- * the first and the only entry in a new top-level page table.
- */
- trampoline_32bit[TRAMPOLINE_32BIT_PGTABLE_OFFSET] = __native_read_cr3() | _PAGE_TABLE_NOENC;
- } else {
- unsigned long src;
-
- /*
- * For 5- to 4-level paging transition, copy page table pointed
- * by first entry in the current top-level page table as our
- * new top-level page table.
- *
- * We cannot just point to the page table from trampoline as it
- * may be above 4G.
- */
- src = *(unsigned long *)__native_read_cr3() & PAGE_MASK;
- memcpy(trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long),
- (void *)src, PAGE_SIZE);
- }
-
-out:
return paging_config;
}
-
-void cleanup_trampoline(void *pgtable)
-{
- void *trampoline_pgtable;
-
- trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long);
-
- /*
- * Move the top level page table out of trampoline memory,
- * if it's there.
- */
- if ((void *)__native_read_cr3() == trampoline_pgtable) {
- memcpy(pgtable, trampoline_pgtable, PAGE_SIZE);
- native_write_cr3((unsigned long)pgtable);
- }
-
- /* Restore trampoline memory */
- memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE);
-}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment