Skip to content

Instantly share code, notes, and snippets.

@proywm
Last active July 20, 2018 15:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save proywm/246e69fe210fba057b721afd6d1f504f to your computer and use it in GitHub Desktop.
Save proywm/246e69fe210fba057b721afd6d1f504f to your computer and use it in GitHub Desktop.
PEBS for guest handled by host
[ 63.099826] PANIC: double fault, error_code: 0x0
[ 63.102042] Kernel panic - not syncing: Machine halted.
[ 63.129514] Kernel panic - page fault address fffffe0000059ff8
[ 63.131090] CPU: 2 PID: 1405 Comm: stress Tainted: G OE 4.14.56 8
[ 63.132806] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-4
[ 63.136453] Call Trace:
[ 63.137225] <#DF>
[ 63.138310] dump_stack+0x63/0x8b
[ 63.139210] panic+0xfa/0x264
[ 63.169131] df_debug+0x2d/0x30
[ 63.170518] do_double_fault+0x9a/0x130
[ 63.172109] double_fault+0x1e/0x30
[ 63.173560] RIP: 0010:error_entry+0x1e/0x100
[ 63.178867] RSP: 0000:fffffe000005a000 EFLAGS: 00010046
[ 63.181015] RAX: 000000008dc00a27 RBX: 0000000000000001 RCX: ffffffff8dc00a27
[ 63.187827] RDX: 0000000005da9000 RSI: ffffffff8dc0163f RDI: fffffe000005a078
[ 63.198726] RBP: fffffe000005a079 R08: 0000000000000000 R09: 0000000000000000
[ 63.201235] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
[ 63.203756] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[ 63.206687] ? native_iret+0x7/0x7
[ 63.208317] ? async_page_fault+0xf/0x50
[ 63.213583] </#DF>
[ 63.219669] <ENTRY_TRAMPOLINE>
[ 63.220804] RIP: 0010:do_async_page_fault+0x0/0x90
[ 63.222843] RSP: 0000:fffffe000005a120 EFLAGS: 00010012
[ 63.231049] RAX: 000000008dc00a27 RBX: 0000000000000001 RCX: ffffffff8dc00a27
[ 63.232686] RDX: 0000000005da9000 RSI: 0000000000000000 RDI: fffffe000005a128
[ 63.234313] RBP: fffffe000005a129 R08: 0000000000000000 R09: 0000000000000000
[ 63.243214] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
[ 63.247130] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[ 63.256083] ? native_iret+0x7/0x7
[ 63.257165] ? kvm_async_pf_task_wait+0x220/0x220
[ 63.258411] ? async_page_fault+0x25/0x50
[ 63.259458] ? native_iret+0x7/0x7
[ 63.271327] RIP: 0010:async_page_fault+0x0/0x50
[ 63.272543] RSP: 0000:fffffe000005a1d0 EFLAGS: 00010006
[ 63.283608] RAX: 00007f254a950010 RBX: 00007f2544ba7010 RCX: 00007f2544ba7010
[ 63.290927] RDX: 0000000005da9000 RSI: 0000000008001000 RDI: 0000000000000000
[ 63.293163] RBP: 0000000008000000 R08: ffffffffffffffff R09: 0000000000000000
[ 63.294882] R10: 0000000000000022 R11: 0000000000000246 R12: 0000000000001000
[ 63.296558] R13: 00007f254cba6010 R14: 0000000000000002 R15: fffffffffffff000
[ 63.298282] ? async_page_fault+0x25/0x50
[ 63.299226] </ENTRY_TRAMPOLINE>
[ 63.301758] Kernel Offset: 0xc200000 from 0xffffffff81000000 (relocation ran)
[ 63.304179] ---[ end Kernel panic - not syncing: Machine halted.
--- ../../linux/arch/x86/kvm/x86.c 2018-05-03 18:08:21.145053785 -0500
+++ arch/x86/kvm/x86.c 2018-07-20 09:44:53.309413748 -0500
@@ -2154,6 +2154,86 @@
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
}
+#include "../events/perf_event.h"
+#define MAX_PINNED_PAGES 17
+
+static struct page *get_guest_page(struct kvm_vcpu *vcpu,
+ unsigned long addr)
+{
+ unsigned long pfn;
+ struct x86_exception exception;
+ gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
+ PFERR_WRITE_MASK,
+ &exception);
+
+ if (gpa == UNMAPPED_GVA) {
+ printk_once("Cannot translate guest page %lx\n", addr);
+ return NULL;
+ }
+ pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
+ if (is_error_noslot_pfn(pfn)) {
+ printk_once("gfn_to_pfn failed for %llx\n", gpa);
+ return NULL;
+ }
+ return pfn_to_page(pfn);
+}
+
+static int pin_and_copy(struct kvm_vcpu *vcpu,
+ unsigned long addr, void *dst, int len,
+ struct page **p)
+{
+ unsigned long offset = addr & ~PAGE_MASK;
+ void *map;
+ *p = get_guest_page(vcpu, addr);
+ if (!*p)
+ return -EIO;
+ map = kmap(*p);
+ memcpy(dst, map + offset, len);
+ kunmap(map);
+ return 0;
+}
+
+static void kvm_pmu_pebs_pin(struct kvm_vcpu *vcpu, u64 ds_area)
+{
+ struct kvm_pmu *pmu = &vcpu->arch.pmu;
+ struct debug_store ds;
+ unsigned long addr;
+ unsigned long offset = ds_area & ~PAGE_MASK;
+ unsigned len = sizeof(struct debug_store);
+ pmu->ds_area = ds_area;
+ len = min_t(unsigned, PAGE_SIZE - offset, len);
+ if (pin_and_copy(vcpu, ds_area, &ds, len,
+ &pmu->pinned_pages[0]) < 0) {
+ printk_once("Cannot pin ds area %llx\n", ds_area);
+ return;
+ }
+ pmu->num_pinned_pages++;
+ if (len < sizeof(struct debug_store)) {
+ if (pin_and_copy(vcpu, ds_area + len, (void *)&ds + len,
+ sizeof(struct debug_store) - len,
+ &pmu->pinned_pages[1]) < 0)
+ return;
+ pmu->num_pinned_pages++;
+ }
+ int pg = pmu->num_pinned_pages;
+ for (addr = ds.pebs_buffer_base;
+ addr < ds.pebs_absolute_maximum && pg < MAX_PINNED_PAGES;
+ addr += PAGE_SIZE, pg++) {
+ pmu->pinned_pages[pg] = get_guest_page(vcpu, addr);
+ printk("going to pin page at %llx\n",addr);
+ if (!pmu->pinned_pages[pg]) {
+ printk_once("Cannot pin PEBS buffer %lx (%llx-%llx)\n",
+ addr,
+ ds.pebs_buffer_base,
+ ds.pebs_absolute_maximum);
+ break;
+ }
+ }
+ pmu->num_pinned_pages = pg;
+
+}
+
+
int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
bool pr = false;
@@ -2161,6 +2241,10 @@
u64 data = msr_info->data;
switch (msr) {
+ case MSR_IA32_DS_AREA:
+ pr_info("KVM: guest asked to set MSR_IA32_DS_AREA\n");
+ kvm_pmu_pebs_pin(vcpu, data);
+ break;
case MSR_AMD64_NB_CFG:
case MSR_IA32_UCODE_REV:
case MSR_IA32_UCODE_WRITE:
--- ../../linux/arch/x86/kvm/vmx.c 2018-05-03 18:08:21.141054902 -0500
+++ arch/x86/kvm/vmx.c 2018-07-20 09:45:45.433479581 -0500
@@ -1995,13 +1995,22 @@
return;
}
break;
- case MSR_IA32_PEBS_ENABLE:
- /* PEBS needs a quiescent period after being disabled (to write
- * a record). Disabling PEBS through VMX MSR swapping doesn't
- * provide that period, so a CPU could write host's record into
- * guest's memory.
- */
- wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
}
for (i = 0; i < m->nr; ++i)
@@ -9274,17 +9286,33 @@
int i, nr_msrs;
struct perf_guest_switch_msr *msrs;
+ if (vmx->vcpu.arch.pmu.ds_area)
+ {
+ add_atomic_switch_msr(vmx, MSR_IA32_DS_AREA,
+ vmx->vcpu.arch.pmu.ds_area,
+ perf_get_ds_area());
+ }
+
msrs = perf_guest_get_msrs(&nr_msrs);
if (!msrs)
return;
for (i = 0; i < nr_msrs; i++)
+ {
+ if (!vmx->vcpu.arch.pmu.ds_area && msrs[i].msr==MSR_IA32_PEBS_ENABLE)
+ {
+ msrs[i].guest = 0;
+ }
if (msrs[i].host == msrs[i].guest)
clear_atomic_switch_msr(vmx, msrs[i].msr);
else
add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest,
msrs[i].host);
+ }
}
static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
--- ../../linux/arch/x86/include/asm/kvm_host.h 2018-05-03 18:08:21.089069431 -0500
+++ arch/x86/include/asm/kvm_host.h 2018-07-19 16:19:37.535310345 -0500
@@ -399,7 +399,7 @@
struct perf_event *perf_event;
struct kvm_vcpu *vcpu;
};
-
+#define MAX_PINNED_PAGES 17
struct kvm_pmu {
unsigned nr_arch_gp_counters;
unsigned nr_arch_fixed_counters;
@@ -416,6 +416,9 @@
struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
struct irq_work irq_work;
u64 reprogram_pmi;
+ u64 ds_area;
+ struct page *pinned_pages[MAX_PINNED_PAGES];
+ unsigned num_pinned_pages;
};
struct kvm_pmu_ops;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment