Created
August 25, 2018 02:24
-
-
Save classilla/6ce544a34450536ae2f11d6ed69989ce to your computer and use it in GitHub Desktop.
Patch to apply to KVM-PR PPC sources for improved Mac OS X performance on machines with emulated dcbz.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c | |
index ca160395..8f4ba2bc 100644 | |
--- a/arch/powerpc/kvm/book3s_pr.c | |
+++ b/arch/powerpc/kvm/book3s_pr.c | |
@@ -23,16 +23,17 @@ | |
#include <linux/export.h> | |
#include <linux/err.h> | |
#include <linux/slab.h> | |
#include <asm/reg.h> | |
#include <asm/cputable.h> | |
#include <asm/cacheflush.h> | |
#include <asm/tlbflush.h> | |
+#include <asm/ppc-opcode.h> | |
#include <linux/uaccess.h> | |
#include <asm/io.h> | |
#include <asm/kvm_ppc.h> | |
#include <asm/kvm_book3s.h> | |
#include <asm/mmu_context.h> | |
#include <asm/switch_to.h> | |
#include <asm/firmware.h> | |
#include <asm/setup.h> | |
@@ -463,16 +464,60 @@ void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr) | |
case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */ | |
case 0x00087200: /* broadway */ | |
vcpu->arch.hflags |= BOOK3S_HFLAG_NATIVE_PS; | |
/* Enable HID2.PSE - in case we need it later */ | |
mtspr(SPRN_HID2_GEKKO, mfspr(SPRN_HID2_GEKKO) | (1 << 29)); | |
} | |
} | |
+static void install_modified_bzero(u32 *page, u32 offset) { | |
+// This routine does not use CTR or CTR-based loops; they seem to hang. | |
+page[offset++] = cpu_to_be32(0x2b840010); | |
+page[offset++] = cpu_to_be32(0x38000000); | |
+page[offset++] = cpu_to_be32(0x7ca300d0); | |
+page[offset++] = cpu_to_be32(0x7c691b78); | |
+page[offset++] = cpu_to_be32(0x419c0058); | |
+page[offset++] = cpu_to_be32(0x70a5000f); | |
+page[offset++] = cpu_to_be32(0x7c852050); | |
+page[offset++] = cpu_to_be32(0x5488e13e); | |
+page[offset++] = cpu_to_be32(0x2c880000); | |
+page[offset++] = cpu_to_be32(0x4182001c); | |
+page[offset++] = cpu_to_be32(0x90090000); | |
+page[offset++] = cpu_to_be32(0x90090004); | |
+page[offset++] = cpu_to_be32(0x90090008); | |
+page[offset++] = cpu_to_be32(0x9009000c); | |
+page[offset++] = cpu_to_be32(0x7d292a14); | |
+page[offset++] = cpu_to_be32(0x4186002c); | |
+page[offset++] = cpu_to_be32(0x7085000f); | |
+page[offset++] = cpu_to_be32(0x90090000); | |
+page[offset++] = cpu_to_be32(0x90090004); | |
+page[offset++] = cpu_to_be32(0x90090008); | |
+page[offset++] = cpu_to_be32(0x9009000c); | |
+page[offset++] = cpu_to_be32(0x3908ffff); | |
+page[offset++] = cpu_to_be32(0x2c880000); | |
+page[offset++] = cpu_to_be32(0x39290010); | |
+page[offset++] = cpu_to_be32(0x4185ffe4); | |
+page[offset++] = cpu_to_be32(0x4d820020); | |
+page[offset++] = cpu_to_be32(0x7c901120); | |
+page[offset++] = cpu_to_be32(0x409c0010); | |
+page[offset++] = cpu_to_be32(0x90090000); | |
+page[offset++] = cpu_to_be32(0x90090004); | |
+page[offset++] = cpu_to_be32(0x39290008); | |
+page[offset++] = cpu_to_be32(0x409d000c); | |
+page[offset++] = cpu_to_be32(0x90090000); | |
+page[offset++] = cpu_to_be32(0x39290004); | |
+page[offset++] = cpu_to_be32(0x409e000c); | |
+page[offset++] = cpu_to_be32(0xb0090000); | |
+page[offset++] = cpu_to_be32(0x39290002); | |
+page[offset++] = cpu_to_be32(0x4c9f0020); | |
+page[offset++] = cpu_to_be32(0x98090000); | |
+page[offset++] = cpu_to_be32(0x4e800020); | |
+} | |
+ | |
/* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To | |
* make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to | |
* emulate 32 bytes dcbz length. | |
* | |
* The Book3s_64 inventors also realized this case and implemented a special bit | |
* in the HID5 register, which is a hypervisor ressource. Thus we can't use it. | |
* | |
* My approach here is to patch the dcbz instruction on executing pages. | |
@@ -490,20 +535,51 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) | |
hpage_offset = pte->raddr & ~PAGE_MASK; | |
hpage_offset &= ~0xFFFULL; | |
hpage_offset /= 4; | |
get_page(hpage); | |
page = kmap_atomic(hpage); | |
+ if ((pte->eaddr & 0xffff0000) == 0xffff0000) { | |
+ /* commpage OS X */ | |
+#if(0) | |
+for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++) | |
+ if (be32_to_cpu(page[i]) == 0x2b840020) | |
+ pr_info("kvmac: instruction found at offset %i %08x\n", i, pte->eaddr); | |
+#endif | |
+ | |
+ // attempt to patch bzero32 | |
+ if (pte->eaddr == 0xffff8600) { | |
+ u32 offset = 0; | |
+ | |
+ // cmplwi cr7,r4,32 | |
+ // 10.3.9 | |
+ if (be32_to_cpu(page[384]) == 0x2b840020) | |
+ offset = 384; | |
+ // 10.4.11 | |
+ if (be32_to_cpu(page[2432]) == 0x2b840020) | |
+ offset = 2432; | |
+ if (offset) { | |
+pr_info("kvmac: patching bzero_32, offset %i, 0xffff8600\n", offset); | |
+ install_modified_bzero(page, offset); | |
+ } | |
+ } | |
+ } | |
+ | |
/* patch dcbz into reserved instruction, so we trap */ | |
for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++) | |
+ { | |
if ((be32_to_cpu(page[i]) & 0xff0007ff) == INS_DCBZ) | |
page[i] &= cpu_to_be32(0xfffffff7); | |
+ else /* patch dcba to nop, so we don't have to */ | |
+ if ((be32_to_cpu(page[i]) & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) | |
+ page[i] = cpu_to_be32(0x60000000); | |
+ } | |
kunmap_atomic(page); | |
put_page(hpage); | |
} | |
static bool kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) | |
{ | |
ulong mp_pa = vcpu->arch.magic_page_pa; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment