Skip to content

Instantly share code, notes, and snippets.

@mgerdts
Last active February 22, 2018 05:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mgerdts/9d1e276d0a6450fcd7b839ce72c2abfd to your computer and use it in GitHub Desktop.
Save mgerdts/9d1e276d0a6450fcd7b839ce72c2abfd to your computer and use it in GitHub Desktop.
false sharing

Baseline

  • Non-debug
  • dev-bhyve branch with this as last commit:
    commit dc994c4720f1b6cbd88011c5de076e580a8deeab
    Author: Mike Gerdts <mike.gerdts@joyent.com>
    Date:   Thu Feb 15 21:40:29 2018 +0000
    
        OS-6558 cleanup to match master push
    
  • Platform Image:
    $ mls /mgerdts/public/bhyve-20180221-nd
    platform-20180214T165906Z.iso
    platform-20180214T165906Z.tgz
    platform-20180214T165906Z.usb.bz2
    
  • Guest running Ubuntu 17.10, 16 vcpu, 48 GB RAM
$ cd /builds/linux
$ make -j 32 clean; time make -j 32
...
real	11m32.055s
user	139m2.487s
sys	20m27.720s

struct vcpu alignment

Same as above, but every struct vcpu is guaranteed to be aligned.

commit 442f94ec0bd6cdeaefbdda627f08036be4378849
Author: Mike Gerdts <mike.gerdts@joyent.com>
Date:   Thu Feb 22 04:10:22 2018 +0000

    avoid false sharing in vcpu array

diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c
index 0e55799..7dd48be 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm.c
@@ -124,6 +124,7 @@ struct vcpu {
        void            *stats;         /* (a,i) statistics */
        struct vm_exit  exitinfo;       /* (x) exit reason and collateral */
        uint64_t        nextrip;        /* (x) next instruction to execute */
+       uint64_t        pad;            /* avoid false sharing */
 };
 
 #define        vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
@@ -180,7 +181,7 @@ struct vm {
        struct mem_seg  mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */
        struct vmspace  *vmspace;               /* (o) guest's address space */
        char            name[VM_MAX_NAMELEN];   /* (o) virtual machine name */
-       struct vcpu     vcpu[VM_MAXCPU];        /* (i) guest vcpus */
+       struct vcpu     *vcpu;                  /* (i) guest vcpus */
 #ifndef __FreeBSD__
        krwlock_t       ioport_rwlock;
        list_t          ioport_hooks;
@@ -306,6 +307,9 @@ vcpu_init(struct vm *vm, int vcpu_id, bool create)
 
        vcpu = &vm->vcpu[vcpu_id];
 
+       KASSERT(((uintptr_t)vcpu & (CPU_CACHE_COHERENCE_SIZE - 1)) == 0,
+           ("vcpu_init: cpu %d not aligned on cache line", vcpu_id));
+
        if (create) {
 #ifdef __FreeBSD__
                KASSERT(!vcpu_lock_initialized(vcpu), ("vcpu %d already "
@@ -504,6 +508,10 @@ vm_init(struct vm *vm, bool create)
        vm->suspend = 0;
        CPU_ZERO(&vm->suspended_cpus);
 
+       if (create) {
+               vm->vcpu = kmem_zalloc(VM_MAXCPU * sizeof (*vm->vcpu),
+                   KM_SLEEP);
+       }
        for (i = 0; i < VM_MAXCPU; i++)
                vcpu_init(vm, i, create);
 }

Verified with:

[root@emy-17 /root]# mdb -k
Loading modules: [ unix genunix specfs dtrace mac cpu.generic uppc apix scsi_vhci ufs vmm ip hook neti sockfs arp usba xhci stmf_sbd stmf zfs mm lofs idm sata crypto random cpc logindmux ptm kvm sppp nsmb smbsrv nfs ]
> ::sizeof struct vcpu
sizeof (struct vcpu) = 0x100
> ::sizeof struct vcpu |=D
                256             
$ cd /builds/linux
$ make -j 32 clean; time make -j 32
...
real	11m9.153s
user	138m59.443s
sys	20m15.444s

Strategies

In svm_softc.h I found something that makes alignment on page boundaries easy. We could do the same for cache line boundaries too.

struct svm_vcpu {
        struct vmcb     vmcb;    /* hardware saved vcpu context */
        struct svm_regctx swctx; /* software saved vcpu context */
        uint64_t        vmcb_pa; /* VMCB physical address */
        uint64_t        nextrip; /* next instruction to be executed by guest */
        int             lastcpu; /* host cpu that the vcpu last ran on */
        uint32_t        dirty;   /* state cache bits that must be cleared */
        long            eptgen;  /* pmap->pm_eptgen when the vcpu last ran */
        struct asid     asid;
} __aligned(PAGE_SIZE);

In mdb we can see that almost an entire page of pad is added.

> ::sizeof struct svm_vcpu    
sizeof (struct svm_vcpu) = 0x2000
> ::print -ta struct svm_vcpu
0 struct svm_vcpu {
    0 struct vmcb vmcb {
...
    1090 struct asid asid {
        1090 uint64_t gen 
        1098 uint32_t num 
    }
}

usr/src/cmd/bhyve/bhyverun.c

This only includes those that are not in #ifdef __FreeBSD__.

vmexit array

struct vm_exit should be padded to 192 bytes (add uint64_t pad[7])

static struct vm_exit vmexit[VM_MAXCPU];                                        
> ::sizeof struct vm_exit
sizeof (struct vm_exit) = 0x88
> ::sizeof struct vm_exit |=D
                136             

mt_vmm_info array

Ugh. We need 40 bytes of pad on a 24 byte structure. Maybe this structure could be combined with something else.

struct mt_vmm_info {
        pthread_t       mt_thr;
        struct vmctx    *mt_ctx;
        int             mt_vcpu;        
} mt_vmm_info[VM_MAXCPU];
> ::sizeof struct mt_vmm_info
sizeof (struct mt_vmm_info) = 0x18
> ::sizeof struct mt_vmm_info |=D
                24              

struct vmx

/* virtual machine softc */
struct vmx {
        struct vmcs     vmcs[VM_MAXCPU];        /* one vmcs per virtual cpu */
        struct apic_page apic_page[VM_MAXCPU];  /* one apic page per vcpu */
        char            msr_bitmap[PAGE_SIZE];
        struct pir_desc pir_desc[VM_MAXCPU];
        uint64_t        guest_msrs[VM_MAXCPU][GUEST_MSR_NUM];
#ifndef __FreeBSD__
        uint64_t        host_msrs[VM_MAXCPU][GUEST_MSR_NUM];
        uint64_t        tsc_offset[VM_MAXCPU];
#endif
        struct vmxctx   ctx[VM_MAXCPU];
        struct vmxcap   cap[VM_MAXCPU];
        struct vmxstate state[VM_MAXCPU];
        uint64_t        eptp;
        struct vm       *vm;
        long            eptgen[MAXCPU];         /* cached pmap->pm_eptgen */
};

We can see here that each vcpu's guest_msrs and host_msrs share cache lines with neighboring vcpus.

> ::print -ta struct vmx
0 struct vmx {
    0 struct vmcs [16] vmcs 
    20000 struct apic_page [16] apic_page 
    30000 char [4096] msr_bitmap 
    31000 struct pir_desc [16] pir_desc 
    31400 uint64_t [16][6] guest_msrs 
    31700 uint64_t [16][6] host_msrs 
    31a00 uint64_t [16] tsc_offset 
    31a80 struct vmxctx [16] ctx 
    32380 struct vmxcap [16] cap 
    32440 struct vmxstate [16] state 
    32540 uint64_t eptp 
    32548 struct vm *vm 
    32550 long [256] eptgen 
}

This could probably be greatly improved as:

struct vmx_vcpu {
        struct vmcs     vmcs;        /* one vmcs per virtual cpu */
        struct apic_page apic_page;  /* one apic page per vcpu */
        struct pir_desc pir_desc;
        uint64_t        guest_msrs[GUEST_MSR_NUM];
#ifndef __FreeBSD__
        uint64_t        host_msrs[GUEST_MSR_NUM];
        uint64_t        tsc_offset;
#endif
        struct vmxctx   ctx;
        struct vmxcap   cap;
        struct vmxstate state;
        long            eptgen;         /* cached pmap->pm_eptgen */
} __aligned(CPU_CACHE_COHERENCE_SIZE);

struct vmx {
        char            msr_bitmap[PAGE_SIZE];
        struct vmx_vcpu vmx_vcpu[VM_MAXCPU];
        uint64_t        eptp;
        struct vm       *vm;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment