-
-
Save proywm/246e69fe210fba057b721afd6d1f504f to your computer and use it in GitHub Desktop.
PEBS for guest handled by host
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ 63.099826] PANIC: double fault, error_code: 0x0 | |
[ 63.102042] Kernel panic - not syncing: Machine halted. | |
[ 63.129514] Kernel panic - page fault address fffffe0000059ff8 | |
[ 63.131090] CPU: 2 PID: 1405 Comm: stress Tainted: G OE 4.14.56 8 | |
[ 63.132806] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-4 | |
[ 63.136453] Call Trace: | |
[ 63.137225] <#DF> | |
[ 63.138310] dump_stack+0x63/0x8b | |
[ 63.139210] panic+0xfa/0x264 | |
[ 63.169131] df_debug+0x2d/0x30 | |
[ 63.170518] do_double_fault+0x9a/0x130 | |
[ 63.172109] double_fault+0x1e/0x30 | |
[ 63.173560] RIP: 0010:error_entry+0x1e/0x100 | |
[ 63.178867] RSP: 0000:fffffe000005a000 EFLAGS: 00010046 | |
[ 63.181015] RAX: 000000008dc00a27 RBX: 0000000000000001 RCX: ffffffff8dc00a27 | |
[ 63.187827] RDX: 0000000005da9000 RSI: ffffffff8dc0163f RDI: fffffe000005a078 | |
[ 63.198726] RBP: fffffe000005a079 R08: 0000000000000000 R09: 0000000000000000 | |
[ 63.201235] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 | |
[ 63.203756] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 | |
[ 63.206687] ? native_iret+0x7/0x7 | |
[ 63.208317] ? async_page_fault+0xf/0x50 | |
[ 63.213583] </#DF> | |
[ 63.219669] <ENTRY_TRAMPOLINE> | |
[ 63.220804] RIP: 0010:do_async_page_fault+0x0/0x90 | |
[ 63.222843] RSP: 0000:fffffe000005a120 EFLAGS: 00010012 | |
[ 63.231049] RAX: 000000008dc00a27 RBX: 0000000000000001 RCX: ffffffff8dc00a27 | |
[ 63.232686] RDX: 0000000005da9000 RSI: 0000000000000000 RDI: fffffe000005a128 | |
[ 63.234313] RBP: fffffe000005a129 R08: 0000000000000000 R09: 0000000000000000 | |
[ 63.243214] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 | |
[ 63.247130] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 | |
[ 63.256083] ? native_iret+0x7/0x7 | |
[ 63.257165] ? kvm_async_pf_task_wait+0x220/0x220 | |
[ 63.258411] ? async_page_fault+0x25/0x50 | |
[ 63.259458] ? native_iret+0x7/0x7 | |
[ 63.271327] RIP: 0010:async_page_fault+0x0/0x50 | |
[ 63.272543] RSP: 0000:fffffe000005a1d0 EFLAGS: 00010006 | |
[ 63.283608] RAX: 00007f254a950010 RBX: 00007f2544ba7010 RCX: 00007f2544ba7010 | |
[ 63.290927] RDX: 0000000005da9000 RSI: 0000000008001000 RDI: 0000000000000000 | |
[ 63.293163] RBP: 0000000008000000 R08: ffffffffffffffff R09: 0000000000000000 | |
[ 63.294882] R10: 0000000000000022 R11: 0000000000000246 R12: 0000000000001000 | |
[ 63.296558] R13: 00007f254cba6010 R14: 0000000000000002 R15: fffffffffffff000 | |
[ 63.298282] ? async_page_fault+0x25/0x50 | |
[ 63.299226] </ENTRY_TRAMPOLINE> | |
[ 63.301758] Kernel Offset: 0xc200000 from 0xffffffff81000000 (relocation ran) | |
[ 63.304179] ---[ end Kernel panic - not syncing: Machine halted. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- ../../linux/arch/x86/kvm/x86.c 2018-05-03 18:08:21.145053785 -0500 | |
+++ arch/x86/kvm/x86.c 2018-07-20 09:44:53.309413748 -0500 | |
@@ -2154,6 +2154,86 @@ | |
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); | |
} | |
+#include "../events/perf_event.h" | |
+#define MAX_PINNED_PAGES 17 | |
+ | |
+static struct page *get_guest_page(struct kvm_vcpu *vcpu, | |
+ unsigned long addr) | |
+{ | |
+ unsigned long pfn; | |
+ struct x86_exception exception; | |
+ gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, | |
+ PFERR_WRITE_MASK, | |
+ &exception); | |
+ | |
+ if (gpa == UNMAPPED_GVA) { | |
+ printk_once("Cannot translate guest page %lx\n", addr); | |
+ return NULL; | |
+ } | |
+ pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); | |
+ if (is_error_noslot_pfn(pfn)) { | |
+ printk_once("gfn_to_pfn failed for %llx\n", gpa); | |
+ return NULL; | |
+ } | |
+ return pfn_to_page(pfn); | |
+} | |
+ | |
+static int pin_and_copy(struct kvm_vcpu *vcpu, | |
+ unsigned long addr, void *dst, int len, | |
+ struct page **p) | |
+{ | |
+ unsigned long offset = addr & ~PAGE_MASK; | |
+ void *map; | |
+ *p = get_guest_page(vcpu, addr); | |
+ if (!*p) | |
+ return -EIO; | |
+ map = kmap(*p); | |
+ memcpy(dst, map + offset, len); | |
+ kunmap(map); | |
+ return 0; | |
+} | |
+ | |
+static void kvm_pmu_pebs_pin(struct kvm_vcpu *vcpu, u64 ds_area) | |
+{ | |
+ struct kvm_pmu *pmu = &vcpu->arch.pmu; | |
+ struct debug_store ds; | |
+ unsigned long addr; | |
+ unsigned long offset = ds_area & ~PAGE_MASK; | |
+ unsigned len = sizeof(struct debug_store); | |
+ pmu->ds_area = ds_area; | |
+ len = min_t(unsigned, PAGE_SIZE - offset, len); | |
+ if (pin_and_copy(vcpu, ds_area, &ds, len, | |
+ &pmu->pinned_pages[0]) < 0) { | |
+ printk_once("Cannot pin ds area %llx\n", ds_area); | |
+ return; | |
+ } | |
+ pmu->num_pinned_pages++; | |
+ if (len < sizeof(struct debug_store)) { | |
+ if (pin_and_copy(vcpu, ds_area + len, (void *)&ds + len, | |
+ sizeof(struct debug_store) - len, | |
+ &pmu->pinned_pages[1]) < 0) | |
+ return; | |
+ pmu->num_pinned_pages++; | |
+ } | |
+ int pg = pmu->num_pinned_pages; | |
+ for (addr = ds.pebs_buffer_base; | |
+ addr < ds.pebs_absolute_maximum && pg < MAX_PINNED_PAGES; | |
+ addr += PAGE_SIZE, pg++) { | |
+ pmu->pinned_pages[pg] = get_guest_page(vcpu, addr); | |
+ printk("going to pin page at %llx\n",addr); | |
+ if (!pmu->pinned_pages[pg]) { | |
+ printk_once("Cannot pin PEBS buffer %lx (%llx-%llx)\n", | |
+ addr, | |
+ ds.pebs_buffer_base, | |
+ ds.pebs_absolute_maximum); | |
+ break; | |
+ } | |
+ } | |
+ pmu->num_pinned_pages = pg; | |
+ | |
+} | |
+ | |
+ | |
int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |
{ | |
bool pr = false; | |
@@ -2161,6 +2241,10 @@ | |
u64 data = msr_info->data; | |
switch (msr) { | |
+ case MSR_IA32_DS_AREA: | |
+ pr_info("KVM: guest asked to set MSR_IA32_DS_AREA\n"); | |
+ kvm_pmu_pebs_pin(vcpu, data); | |
+ break; | |
case MSR_AMD64_NB_CFG: | |
case MSR_IA32_UCODE_REV: | |
case MSR_IA32_UCODE_WRITE: | |
--- ../../linux/arch/x86/kvm/vmx.c 2018-05-03 18:08:21.141054902 -0500 | |
+++ arch/x86/kvm/vmx.c 2018-07-20 09:45:45.433479581 -0500 | |
@@ -1995,13 +1995,22 @@ | |
return; | |
} | |
break; | |
- case MSR_IA32_PEBS_ENABLE: | |
- /* PEBS needs a quiescent period after being disabled (to write | |
- * a record). Disabling PEBS through VMX MSR swapping doesn't | |
- * provide that period, so a CPU could write host's record into | |
- * guest's memory. | |
- */ | |
- wrmsrl(MSR_IA32_PEBS_ENABLE, 0); | |
} | |
for (i = 0; i < m->nr; ++i) | |
@@ -9274,17 +9286,33 @@ | |
int i, nr_msrs; | |
struct perf_guest_switch_msr *msrs; | |
+ if (vmx->vcpu.arch.pmu.ds_area) | |
+ { | |
+ add_atomic_switch_msr(vmx, MSR_IA32_DS_AREA, | |
+ vmx->vcpu.arch.pmu.ds_area, | |
+ perf_get_ds_area()); | |
+ } | |
+ | |
msrs = perf_guest_get_msrs(&nr_msrs); | |
if (!msrs) | |
return; | |
for (i = 0; i < nr_msrs; i++) | |
+ { | |
+ if (!vmx->vcpu.arch.pmu.ds_area && msrs[i].msr==MSR_IA32_PEBS_ENABLE) | |
+ { | |
+ msrs[i].guest = 0; | |
+ } | |
if (msrs[i].host == msrs[i].guest) | |
clear_atomic_switch_msr(vmx, msrs[i].msr); | |
else | |
add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest, | |
msrs[i].host); | |
+ } | |
} | |
static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu) | |
--- ../../linux/arch/x86/include/asm/kvm_host.h 2018-05-03 18:08:21.089069431 -0500 | |
+++ arch/x86/include/asm/kvm_host.h 2018-07-19 16:19:37.535310345 -0500 | |
@@ -399,7 +399,7 @@ | |
struct perf_event *perf_event; | |
struct kvm_vcpu *vcpu; | |
}; | |
- | |
+#define MAX_PINNED_PAGES 17 | |
struct kvm_pmu { | |
unsigned nr_arch_gp_counters; | |
unsigned nr_arch_fixed_counters; | |
@@ -416,6 +416,9 @@ | |
struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED]; | |
struct irq_work irq_work; | |
u64 reprogram_pmi; | |
+ u64 ds_area; | |
+ struct page *pinned_pages[MAX_PINNED_PAGES]; | |
+ unsigned num_pinned_pages; | |
}; | |
struct kvm_pmu_ops; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment