Instantly share code, notes, and snippets.

Embed
What would you like to do?
Patch to apply to KVM-PR PPC sources for improved Mac OS X performance on machines with emulated dcbz.
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index ca160395..8f4ba2bc 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -23,16 +23,17 @@
#include <linux/export.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <asm/reg.h>
#include <asm/cputable.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
+#include <asm/ppc-opcode.h>
#include <linux/uaccess.h>
#include <asm/io.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
#include <asm/mmu_context.h>
#include <asm/switch_to.h>
#include <asm/firmware.h>
#include <asm/setup.h>
@@ -463,16 +464,60 @@ void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr)
case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */
case 0x00087200: /* broadway */
vcpu->arch.hflags |= BOOK3S_HFLAG_NATIVE_PS;
/* Enable HID2.PSE - in case we need it later */
mtspr(SPRN_HID2_GEKKO, mfspr(SPRN_HID2_GEKKO) | (1 << 29));
}
}
+static void install_modified_bzero(u32 *page, u32 offset) {
+// This routine does not use CTR or CTR-based loops; they seem to hang.
+page[offset++] = cpu_to_be32(0x2b840010);
+page[offset++] = cpu_to_be32(0x38000000);
+page[offset++] = cpu_to_be32(0x7ca300d0);
+page[offset++] = cpu_to_be32(0x7c691b78);
+page[offset++] = cpu_to_be32(0x419c0058);
+page[offset++] = cpu_to_be32(0x70a5000f);
+page[offset++] = cpu_to_be32(0x7c852050);
+page[offset++] = cpu_to_be32(0x5488e13e);
+page[offset++] = cpu_to_be32(0x2c880000);
+page[offset++] = cpu_to_be32(0x4182001c);
+page[offset++] = cpu_to_be32(0x90090000);
+page[offset++] = cpu_to_be32(0x90090004);
+page[offset++] = cpu_to_be32(0x90090008);
+page[offset++] = cpu_to_be32(0x9009000c);
+page[offset++] = cpu_to_be32(0x7d292a14);
+page[offset++] = cpu_to_be32(0x4186002c);
+page[offset++] = cpu_to_be32(0x7085000f);
+page[offset++] = cpu_to_be32(0x90090000);
+page[offset++] = cpu_to_be32(0x90090004);
+page[offset++] = cpu_to_be32(0x90090008);
+page[offset++] = cpu_to_be32(0x9009000c);
+page[offset++] = cpu_to_be32(0x3908ffff);
+page[offset++] = cpu_to_be32(0x2c880000);
+page[offset++] = cpu_to_be32(0x39290010);
+page[offset++] = cpu_to_be32(0x4185ffe4);
+page[offset++] = cpu_to_be32(0x4d820020);
+page[offset++] = cpu_to_be32(0x7c901120);
+page[offset++] = cpu_to_be32(0x409c0010);
+page[offset++] = cpu_to_be32(0x90090000);
+page[offset++] = cpu_to_be32(0x90090004);
+page[offset++] = cpu_to_be32(0x39290008);
+page[offset++] = cpu_to_be32(0x409d000c);
+page[offset++] = cpu_to_be32(0x90090000);
+page[offset++] = cpu_to_be32(0x39290004);
+page[offset++] = cpu_to_be32(0x409e000c);
+page[offset++] = cpu_to_be32(0xb0090000);
+page[offset++] = cpu_to_be32(0x39290002);
+page[offset++] = cpu_to_be32(0x4c9f0020);
+page[offset++] = cpu_to_be32(0x98090000);
+page[offset++] = cpu_to_be32(0x4e800020);
+}
+
/* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To
* make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to
* emulate 32 bytes dcbz length.
*
* The Book3s_64 inventors also realized this case and implemented a special bit
* in the HID5 register, which is a hypervisor ressource. Thus we can't use it.
*
* My approach here is to patch the dcbz instruction on executing pages.
@@ -490,20 +535,51 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
hpage_offset = pte->raddr & ~PAGE_MASK;
hpage_offset &= ~0xFFFULL;
hpage_offset /= 4;
get_page(hpage);
page = kmap_atomic(hpage);
+ if ((pte->eaddr & 0xffff0000) == 0xffff0000) {
+ /* commpage OS X */
+#if(0)
+for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++)
+ if (be32_to_cpu(page[i]) == 0x2b840020)
+ pr_info("kvmac: instruction found at offset %i %08x\n", i, pte->eaddr);
+#endif
+
+ // attempt to patch bzero32
+ if (pte->eaddr == 0xffff8600) {
+ u32 offset = 0;
+
+ // cmplwi cr7,r4,32
+ // 10.3.9
+ if (be32_to_cpu(page[384]) == 0x2b840020)
+ offset = 384;
+ // 10.4.11
+ if (be32_to_cpu(page[2432]) == 0x2b840020)
+ offset = 2432;
+ if (offset) {
+pr_info("kvmac: patching bzero_32, offset %i, 0xffff8600\n", offset);
+ install_modified_bzero(page, offset);
+ }
+ }
+ }
+
/* patch dcbz into reserved instruction, so we trap */
for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++)
+ {
if ((be32_to_cpu(page[i]) & 0xff0007ff) == INS_DCBZ)
page[i] &= cpu_to_be32(0xfffffff7);
+ else /* patch dcba to nop, so we don't have to */
+ if ((be32_to_cpu(page[i]) & PPC_INST_DCBA_MASK) == PPC_INST_DCBA)
+ page[i] = cpu_to_be32(0x60000000);
+ }
kunmap_atomic(page);
put_page(hpage);
}
static bool kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
{
ulong mp_pa = vcpu->arch.magic_page_pa;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment