Created
December 22, 2018 00:03
-
-
Save pzb/95ab0846a52e8721de1a52f743c2cb88 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S | |
index 64037895b085..d598d65db32c 100644 | |
--- a/arch/x86/boot/compressed/head_64.S | |
+++ b/arch/x86/boot/compressed/head_64.S | |
@@ -33,7 +33,6 @@ | |
#include <asm/processor-flags.h> | |
#include <asm/asm-offsets.h> | |
#include <asm/bootparam.h> | |
-#include "pgtable.h" | |
/* | |
* Locally defined symbols should be marked hidden: | |
@@ -305,55 +304,15 @@ ENTRY(startup_64) | |
/* Set up the stack */ | |
leaq boot_stack_end(%rbx), %rsp | |
- /* | |
- * paging_prepare() and cleanup_trampoline() below can have GOT | |
- * references. Adjust the table with address we are running at. | |
- * | |
- * Zero RAX for adjust_got: the GOT was not adjusted before; | |
- * there's no adjustment to undo. | |
- */ | |
- xorq %rax, %rax | |
- | |
- /* | |
- * Calculate the address the binary is loaded at and use it as | |
- * a GOT adjustment. | |
- */ | |
- call 1f | |
-1: popq %rdi | |
- subq $1b, %rdi | |
- | |
- call adjust_got | |
- | |
/* | |
* At this point we are in long mode with 4-level paging enabled, | |
- * but we might want to enable 5-level paging or vice versa. | |
- * | |
- * The problem is that we cannot do it directly. Setting or clearing | |
- * CR4.LA57 in long mode would trigger #GP. So we need to switch off | |
- * long mode and paging first. | |
+ * but we want to enable 5-level paging. | |
* | |
- * We also need a trampoline in lower memory to switch over from | |
- * 4- to 5-level paging for cases when the bootloader puts the kernel | |
- * above 4G, but didn't enable 5-level paging for us. | |
- * | |
- * The same trampoline can be used to switch from 5- to 4-level paging | |
- * mode, like when starting 4-level paging kernel via kexec() when | |
- * original kernel worked in 5-level paging mode. | |
- * | |
- * For the trampoline, we need the top page table to reside in lower | |
- * memory as we don't have a way to load 64-bit values into CR3 in | |
- * 32-bit mode. | |
- * | |
- * We go though the trampoline even if we don't have to: if we're | |
- * already in a desired paging mode. This way the trampoline code gets | |
- * tested on every boot. | |
+ * The problem is that we cannot do it directly. Setting LA57 in | |
+ * long mode would trigger #GP. So we need to switch off long mode | |
+ * first. | |
*/ | |
- /* Make sure we have GDT with 32-bit code segment */ | |
- leaq gdt(%rip), %rax | |
- movq %rax, gdt64+2(%rip) | |
- lgdt gdt64(%rip) | |
- | |
/* | |
* paging_prepare() sets up the trampoline and checks if we need to | |
* enable 5-level paging. | |
@@ -365,61 +324,41 @@ ENTRY(startup_64) | |
* this function call. | |
*/ | |
pushq %rsi | |
- movq %rsi, %rdi /* real mode address */ | |
call paging_prepare | |
popq %rsi | |
/* Save the trampoline address in RCX */ | |
movq %rax, %rcx | |
+ /* Check if we need to enable 5-level paging */ | |
+ cmpq $0, %rdx | |
+ jz lvl5 | |
+ | |
+ /* Clear additional page table */ | |
+ leaq lvl5_pgtable(%rbx), %rdi | |
+ xorq %rax, %rax | |
+ movq $(PAGE_SIZE/8), %rcx | |
+ rep stosq | |
+ | |
/* | |
- * Load the address of trampoline_return() into RDI. | |
- * It will be used by the trampoline to return to the main code. | |
+ * Setup current CR3 as the first and only entry in a new top level | |
+ * page table. | |
*/ | |
- leaq trampoline_return(%rip), %rdi | |
+ movq %cr3, %rdi | |
+ leaq 0x7 (%rdi), %rax | |
+ movq %rax, lvl5_pgtable(%rbx) | |
/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ | |
pushq $__KERNEL32_CS | |
- leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax | |
+ leaq compatible_mode(%rip), %rax | |
pushq %rax | |
lretq | |
-trampoline_return: | |
- /* Restore the stack, the 32-bit trampoline uses its own stack */ | |
- leaq boot_stack_end(%rbx), %rsp | |
- | |
- /* | |
- * cleanup_trampoline() would restore trampoline memory. | |
- * | |
- * RDI is address of the page table to use instead of page table | |
- * in trampoline memory (if required). | |
- * | |
- * RSI holds real mode data and needs to be preserved across | |
- * this function call. | |
- */ | |
- pushq %rsi | |
- leaq top_pgtable(%rbx), %rdi | |
- call cleanup_trampoline | |
- popq %rsi | |
+lvl5: | |
/* Zero EFLAGS */ | |
pushq $0 | |
popfq | |
- /* | |
- * Previously we've adjusted the GOT with address the binary was | |
- * loaded at. Now we need to re-adjust for relocation address. | |
- * | |
- * Calculate the address the binary is loaded at, so that we can | |
- * undo the previous GOT adjustment. | |
- */ | |
- call 1f | |
-1: popq %rax | |
- subq $1b, %rax | |
- | |
- /* The new adjustment is the relocation address */ | |
- movq %rbx, %rdi | |
- call adjust_got | |
- | |
/* | |
* Copy the compressed kernel to the end of our buffer | |
* where decompression in place becomes safe. | |
@@ -520,6 +459,19 @@ relocated: | |
shrq $3, %rcx | |
rep stosq | |
+/* | |
+ * Adjust our own GOT | |
+ */ | |
+ leaq _got(%rip), %rdx | |
+ leaq _egot(%rip), %rcx | |
+1: | |
+ cmpq %rcx, %rdx | |
+ jae 2f | |
+ addq %rbx, (%rdx) | |
+ addq $8, %rdx | |
+ jmp 1b | |
+2: | |
+ | |
/* | |
* Do the extraction, and jump to the new kernel.. | |
*/ | |
@@ -538,104 +490,45 @@ relocated: | |
*/ | |
jmp *%rax | |
-/* | |
- * Adjust the global offset table | |
- * | |
- * RAX is the previous adjustment of the table to undo (use 0 if it's the | |
- * first time we touch GOT). | |
- * RDI is the new adjustment to apply. | |
- */ | |
-adjust_got: | |
- /* Walk through the GOT adding the address to the entries */ | |
- leaq _got(%rip), %rdx | |
- leaq _egot(%rip), %rcx | |
-1: | |
- cmpq %rcx, %rdx | |
- jae 2f | |
- subq %rax, (%rdx) /* Undo previous adjustment */ | |
- addq %rdi, (%rdx) /* Apply the new adjustment */ | |
- addq $8, %rdx | |
- jmp 1b | |
-2: | |
- ret | |
- | |
.code32 | |
-/* | |
- * This is the 32-bit trampoline that will be copied over to low memory. | |
- * | |
- * RDI contains the return address (might be above 4G). | |
- * ECX contains the base address of the trampoline memory. | |
- * Non zero RDX on return means we need to enable 5-level paging. | |
- */ | |
-ENTRY(trampoline_32bit_src) | |
- /* Set up data and stack segments */ | |
+compatible_mode: | |
+ /* Setup data and stack segments */ | |
movl $__KERNEL_DS, %eax | |
movl %eax, %ds | |
movl %eax, %ss | |
- /* Set up new stack */ | |
- leal TRAMPOLINE_32BIT_STACK_END(%ecx), %esp | |
- | |
/* Disable paging */ | |
movl %cr0, %eax | |
btrl $X86_CR0_PG_BIT, %eax | |
movl %eax, %cr0 | |
- /* Check what paging mode we want to be in after the trampoline */ | |
- cmpl $0, %edx | |
- jz 1f | |
+ /* Point CR3 to 5-level paging */ | |
+ leal lvl5_pgtable(%ebx), %eax | |
+ movl %eax, %cr3 | |
- /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */ | |
- movl %cr4, %eax | |
- testl $X86_CR4_LA57, %eax | |
- jnz 3f | |
- jmp 2f | |
-1: | |
- /* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */ | |
+ /* Enable PAE and LA57 mode */ | |
movl %cr4, %eax | |
- testl $X86_CR4_LA57, %eax | |
- jz 3f | |
-2: | |
- /* Point CR3 to the trampoline's new top level page table */ | |
- leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax | |
- movl %eax, %cr3 | |
-3: | |
- /* Enable PAE and LA57 (if required) paging modes */ | |
- movl $X86_CR4_PAE, %eax | |
- cmpl $0, %edx | |
- jz 1f | |
- orl $X86_CR4_LA57, %eax | |
-1: | |
+ orl $(X86_CR4_PAE | X86_CR4_LA57), %eax | |
movl %eax, %cr4 | |
- /* Calculate address of paging_enabled() once we are executing in the trampoline */ | |
- leal paging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax | |
+ /* Calculate address we are running at */ | |
+ call 1f | |
+1: popl %edi | |
+ subl $1b, %edi | |
- /* Prepare the stack for far return to Long Mode */ | |
+ /* Prepare stack for far return to Long Mode */ | |
pushl $__KERNEL_CS | |
- pushl %eax | |
+ leal lvl5(%edi), %eax | |
+ push %eax | |
- /* Enable paging again */ | |
+ /* Enable paging back */ | |
movl $(X86_CR0_PG | X86_CR0_PE), %eax | |
movl %eax, %cr0 | |
lret | |
- .code64 | |
-paging_enabled: | |
- /* Return from the trampoline */ | |
- jmp *%rdi | |
- | |
- /* | |
- * The trampoline code has a size limit. | |
- * Make sure we fail to compile if the trampoline code grows | |
- * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes. | |
- */ | |
- .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE | |
- | |
- .code32 | |
no_longmode: | |
- /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */ | |
+ /* This isn't an x86-64 CPU so hang */ | |
1: | |
hlt | |
jmp 1b | |
@@ -643,11 +536,6 @@ no_longmode: | |
#include "../../kernel/verify_cpu.S" | |
.data | |
-gdt64: | |
- .word gdt_end - gdt | |
- .long 0 | |
- .word 0 | |
- .quad 0 | |
gdt: | |
.word gdt_end - gdt | |
.long gdt | |
@@ -696,10 +584,5 @@ boot_stack_end: | |
.balign 4096 | |
pgtable: | |
.fill BOOT_PGT_SIZE, 1, 0 | |
- | |
-/* | |
- * The page table is going to be used instead of page table in the trampoline | |
- * memory. | |
- */ | |
-top_pgtable: | |
+lvl5_pgtable: | |
.fill PAGE_SIZE, 1, 0 | |
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c | |
index 8dd1d5ccae58..b50c42455e25 100644 | |
--- a/arch/x86/boot/compressed/misc.c | |
+++ b/arch/x86/boot/compressed/misc.c | |
@@ -14,7 +14,6 @@ | |
#include "misc.h" | |
#include "error.h" | |
-#include "pgtable.h" | |
#include "../string.h" | |
#include "../voffset.h" | |
@@ -300,10 +299,6 @@ static void parse_elf(void *output) | |
switch (phdr->p_type) { | |
case PT_LOAD: | |
-#ifdef CONFIG_X86_64 | |
- if ((phdr->p_align % 0x200000) != 0) | |
- error("Alignment of LOAD segment isn't multiple of 2MB"); | |
-#endif | |
#ifdef CONFIG_RELOCATABLE | |
dest = output; | |
dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR); | |
@@ -377,11 +372,6 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, | |
debug_putaddr(output_len); | |
debug_putaddr(kernel_total_size); | |
-#ifdef CONFIG_X86_64 | |
- /* Report address of 32-bit trampoline */ | |
- debug_putaddr(trampoline_32bit); | |
-#endif | |
- | |
/* | |
* The memory hole needed for the kernel is the larger of either | |
* the entire decompressed kernel plus relocation table, or the | |
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c | |
index 9e2157371491..45c76eff2718 100644 | |
--- a/arch/x86/boot/compressed/pgtable_64.c | |
+++ b/arch/x86/boot/compressed/pgtable_64.c | |
@@ -1,7 +1,4 @@ | |
-#include <asm/e820/types.h> | |
#include <asm/processor.h> | |
-#include "pgtable.h" | |
-#include "../string.h" | |
/* | |
* __force_order is used by special_insns.h asm code to force instruction | |
@@ -12,95 +9,20 @@ | |
*/ | |
unsigned long __force_order; | |
-#define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */ | |
-#define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */ | |
- | |
struct paging_config { | |
unsigned long trampoline_start; | |
unsigned long l5_required; | |
}; | |
-/* Buffer to preserve trampoline memory */ | |
-static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; | |
- | |
-/* | |
- * Trampoline address will be printed by extract_kernel() for debugging | |
- * purposes. | |
- * | |
- * Avoid putting the pointer into .bss as it will be cleared between | |
- * paging_prepare() and extract_kernel(). | |
- */ | |
-unsigned long *trampoline_32bit __section(.data); | |
- | |
-extern struct boot_params *boot_params; | |
-int cmdline_find_option_bool(const char *option); | |
- | |
-static unsigned long find_trampoline_placement(void) | |
-{ | |
- unsigned long bios_start, ebda_start; | |
- unsigned long trampoline_start; | |
- struct boot_e820_entry *entry; | |
- int i; | |
- | |
- /* | |
- * Find a suitable spot for the trampoline. | |
- * This code is based on reserve_bios_regions(). | |
- */ | |
- | |
- ebda_start = *(unsigned short *)0x40e << 4; | |
- bios_start = *(unsigned short *)0x413 << 10; | |
- | |
- if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX) | |
- bios_start = BIOS_START_MAX; | |
- | |
- if (ebda_start > BIOS_START_MIN && ebda_start < bios_start) | |
- bios_start = ebda_start; | |
- | |
- bios_start = round_down(bios_start, PAGE_SIZE); | |
- | |
- /* Find the first usable memory region under bios_start. */ | |
- for (i = boot_params->e820_entries - 1; i >= 0; i--) { | |
- entry = &boot_params->e820_table[i]; | |
- | |
- /* Skip all entries above bios_start. */ | |
- if (bios_start <= entry->addr) | |
- continue; | |
- | |
- /* Skip non-RAM entries. */ | |
- if (entry->type != E820_TYPE_RAM) | |
- continue; | |
- | |
- /* Adjust bios_start to the end of the entry if needed. */ | |
- if (bios_start > entry->addr + entry->size) | |
- bios_start = entry->addr + entry->size; | |
- | |
- /* Keep bios_start page-aligned. */ | |
- bios_start = round_down(bios_start, PAGE_SIZE); | |
- | |
- /* Skip the entry if it's too small. */ | |
- if (bios_start - TRAMPOLINE_32BIT_SIZE < entry->addr) | |
- continue; | |
- | |
- break; | |
- } | |
- | |
- /* Place the trampoline just below the end of low memory */ | |
- return bios_start - TRAMPOLINE_32BIT_SIZE; | |
-} | |
- | |
-struct paging_config paging_prepare(void *rmode) | |
+struct paging_config paging_prepare(void) | |
{ | |
struct paging_config paging_config = {}; | |
- /* Initialize boot_params. Required for cmdline_find_option_bool(). */ | |
- boot_params = rmode; | |
- | |
/* | |
* Check if LA57 is desired and supported. | |
* | |
- * There are several parts to the check: | |
+ * There are two parts to the check: | |
* - if the kernel supports 5-level paging: CONFIG_X86_5LEVEL=y | |
- * - if user asked to disable 5-level paging: no5lvl in cmdline | |
* - if the machine supports 5-level paging: | |
* + CPUID leaf 7 is supported | |
* + the leaf has the feature bit set | |
@@ -108,84 +30,10 @@ struct paging_config paging_prepare(void *rmode) | |
* That's substitute for boot_cpu_has() in early boot code. | |
*/ | |
if (IS_ENABLED(CONFIG_X86_5LEVEL) && | |
- !cmdline_find_option_bool("no5lvl") && | |
native_cpuid_eax(0) >= 7 && | |
(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) { | |
paging_config.l5_required = 1; | |
} | |
- paging_config.trampoline_start = find_trampoline_placement(); | |
- | |
- trampoline_32bit = (unsigned long *)paging_config.trampoline_start; | |
- | |
- /* Preserve trampoline memory */ | |
- memcpy(trampoline_save, trampoline_32bit, TRAMPOLINE_32BIT_SIZE); | |
- | |
- /* Clear trampoline memory first */ | |
- memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE); | |
- | |
- /* Copy trampoline code in place */ | |
- memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long), | |
- &trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE); | |
- | |
- /* | |
- * The code below prepares page table in trampoline memory. | |
- * | |
- * The new page table will be used by trampoline code for switching | |
- * from 4- to 5-level paging or vice versa. | |
- * | |
- * If switching is not required, the page table is unused: trampoline | |
- * code wouldn't touch CR3. | |
- */ | |
- | |
- /* | |
- * We are not going to use the page table in trampoline memory if we | |
- * are already in the desired paging mode. | |
- */ | |
- if (paging_config.l5_required == !!(native_read_cr4() & X86_CR4_LA57)) | |
- goto out; | |
- | |
- if (paging_config.l5_required) { | |
- /* | |
- * For 4- to 5-level paging transition, set up current CR3 as | |
- * the first and the only entry in a new top-level page table. | |
- */ | |
- trampoline_32bit[TRAMPOLINE_32BIT_PGTABLE_OFFSET] = __native_read_cr3() | _PAGE_TABLE_NOENC; | |
- } else { | |
- unsigned long src; | |
- | |
- /* | |
- * For 5- to 4-level paging transition, copy page table pointed | |
- * by first entry in the current top-level page table as our | |
- * new top-level page table. | |
- * | |
- * We cannot just point to the page table from trampoline as it | |
- * may be above 4G. | |
- */ | |
- src = *(unsigned long *)__native_read_cr3() & PAGE_MASK; | |
- memcpy(trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long), | |
- (void *)src, PAGE_SIZE); | |
- } | |
- | |
-out: | |
return paging_config; | |
} | |
- | |
-void cleanup_trampoline(void *pgtable) | |
-{ | |
- void *trampoline_pgtable; | |
- | |
- trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long); | |
- | |
- /* | |
- * Move the top level page table out of trampoline memory, | |
- * if it's there. | |
- */ | |
- if ((void *)__native_read_cr3() == trampoline_pgtable) { | |
- memcpy(pgtable, trampoline_pgtable, PAGE_SIZE); | |
- native_write_cr3((unsigned long)pgtable); | |
- } | |
- | |
- /* Restore trampoline memory */ | |
- memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE); | |
-} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment