Skip to content

Instantly share code, notes, and snippets.

@voutilad
Last active June 8, 2020 10:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save voutilad/6e12610390697fd30a649cfdea2d4bf8 to your computer and use it in GitHub Desktop.
Save voutilad/6e12610390697fd30a649cfdea2d4bf8 to your computer and use it in GitHub Desktop.
vmd(8) thread safety patch - solves event base corruption so your non-OpenBSD guests live longer, healthier lives
Index: i8253.c
===================================================================
RCS file: /cvs/src/usr.sbin/vmd/i8253.c,v
retrieving revision 1.31
diff -u -p -u -p -r1.31 i8253.c
--- i8253.c 30 Nov 2019 00:51:29 -0000 1.31
+++ i8253.c 7 Jun 2020 13:53:52 -0000
@@ -30,6 +30,7 @@
#include "i8253.h"
#include "proc.h"
+#include "vmd.h"
#include "vmm.h"
#include "atomicio.h"
@@ -43,6 +44,40 @@ extern char *__progname;
*/
struct i8253_channel i8253_channel[3];
+static struct vm_dev_pipe dev_pipe;
+
+/*
+ * i8253_pipe_handler
+ *
+ * Reads a message off the pipe, expecting one that corresponds to a
+ * reset request for a specific channel.
+ *
+ */
+static void
+i8253_pipe_dispatch(int fd, short event, void *arg)
+{
+ size_t n;
+ uint8_t msg;
+
+ n = read(fd, &msg, sizeof(msg));
+ if (n != sizeof(msg))
+ fatal("failed to read from device pipe");
+
+ switch (msg) {
+ case I8253_RESET_CHAN_0:
+ i8253_reset(0);
+ break;
+ case I8253_RESET_CHAN_1:
+ i8253_reset(1);
+ break;
+ case I8253_RESET_CHAN_2:
+ i8253_reset(2);
+ break;
+ default:
+ fatal("unknown pipe message %u", msg);
+ }
+}
+
/*
* i8253_init
*
@@ -77,6 +112,9 @@ i8253_init(uint32_t vm_id)
evtimer_set(&i8253_channel[0].timer, i8253_fire, &i8253_channel[0]);
evtimer_set(&i8253_channel[1].timer, i8253_fire, &i8253_channel[1]);
evtimer_set(&i8253_channel[2].timer, i8253_fire, &i8253_channel[2]);
+
+ vm_pipe_init(&dev_pipe, i8253_pipe_dispatch);
+ event_add(&dev_pipe.read_ev, NULL);
}
/*
@@ -271,7 +309,7 @@ vcpu_exit_i8253(struct vm_run_params *vr
sel, i8253_channel[sel].mode,
i8253_channel[sel].start);
- i8253_reset(sel);
+ vm_pipe_send(&dev_pipe, sel);
}
} else {
if (i8253_channel[sel].rbs) {
Index: mc146818.c
===================================================================
RCS file: /cvs/src/usr.sbin/vmd/mc146818.c,v
retrieving revision 1.21
diff -u -p -u -p -r1.21 mc146818.c
--- mc146818.c 30 Nov 2019 00:51:29 -0000 1.21
+++ mc146818.c 7 Jun 2020 13:53:52 -0000
@@ -63,6 +63,34 @@ struct mc146818 {
struct mc146818 rtc;
+static struct vm_dev_pipe dev_pipe;
+
+static void rtc_reschedule_per(void);
+
+/*
+ * mc146818_pipe_handler
+ *
+ * Reads a message off the pipe, expecting a request to reschedule periodic
+ * interrupt rate.
+ */
+static void
+mc146818_pipe_dispatch(int fd, short event, void *arg)
+{
+ size_t n;
+ uint8_t msg;
+
+ n = read(fd, &msg, sizeof(msg));
+ if (n != sizeof(msg))
+ fatal("failed to read from device pipe");
+
+ if (msg == MC146818_RESCHEDULE_PER) {
+ log_debug("%s: rescheduling periodic timer", __func__);
+ rtc_reschedule_per();
+ } else {
+ fatal("unknown pipe message %u", msg);
+ }
+}
+
/*
* rtc_updateregs
*
@@ -175,6 +203,9 @@ mc146818_init(uint32_t vm_id, uint64_t m
evtimer_add(&rtc.sec, &rtc.sec_tv);
evtimer_set(&rtc.per, rtc_fireper, (void *)(intptr_t)rtc.vm_id);
+
+ vm_pipe_init(&dev_pipe, mc146818_pipe_dispatch);
+ event_add(&dev_pipe.read_ev, NULL);
}
/*
@@ -217,7 +248,7 @@ rtc_update_rega(uint32_t data)
rtc.regs[MC_REGA] = data;
if ((rtc.regs[MC_REGA] ^ data) & 0x0f)
- rtc_reschedule_per();
+ vm_pipe_send(&dev_pipe, MC146818_RESCHEDULE_PER);
}
/*
@@ -240,7 +271,7 @@ rtc_update_regb(uint32_t data)
rtc.regs[MC_REGB] = data;
if (data & MC_REGB_PIE)
- rtc_reschedule_per();
+ vm_pipe_send(&dev_pipe, MC146818_RESCHEDULE_PER);
}
/*
Index: ns8250.c
===================================================================
RCS file: /cvs/src/usr.sbin/vmd/ns8250.c,v
retrieving revision 1.25
diff -u -p -u -p -r1.25 ns8250.c
--- ns8250.c 11 Dec 2019 06:45:16 -0000 1.25
+++ ns8250.c 7 Jun 2020 13:53:52 -0000
@@ -37,8 +37,40 @@
extern char *__progname;
struct ns8250_dev com1_dev;
+/* Flags to distinguish calling threads to com_rcv */
+#define NS8250_DEV_THREAD 0
+#define NS8250_CPU_THREAD 1
+
+static struct vm_dev_pipe dev_pipe;
+
static void com_rcv_event(int, short, void *);
-static void com_rcv(struct ns8250_dev *, uint32_t, uint32_t);
+static void com_rcv(struct ns8250_dev *, uint32_t, uint32_t, int8_t);
+
+/*
+ * ns8250_pipe_dispatch
+ *
+ * Reads a message off the pipe, expecting a reguest to reset events after a
+ * zero-byte read from the com device.
+ */
+static void
+ns8250_pipe_dispatch(int fd, short event, void *arg)
+{
+ size_t n;
+ uint8_t msg;
+
+ n = read(fd, &msg, sizeof(msg));
+ if (n != sizeof(msg))
+ fatal("failed to read from device pipe");
+
+ if (msg == NS8250_ZERO_READ) {
+ log_debug("%s: resetting events after zero byte read", __func__);
+ event_del(&com1_dev.event);
+ event_add(&com1_dev.wake, NULL);
+ } else {
+ fatal("unknown pipe message %u", msg);
+ }
+}
+
/*
* ratelimit
@@ -55,10 +87,15 @@ static void
ratelimit(int fd, short type, void *arg)
{
/* Set TXRDY and clear "no pending interrupt" */
+ mutex_lock(&com1_dev.mutex);
com1_dev.regs.iir |= IIR_TXRDY;
com1_dev.regs.iir &= ~IIR_NOPEND;
+ mutex_unlock(&com1_dev.mutex);
+
vcpu_assert_pic_irq(com1_dev.vmid, 0, com1_dev.irq);
vcpu_deassert_pic_irq(com1_dev.vmid, 0, com1_dev.irq);
+
+ evtimer_add(&com1_dev.rate, &com1_dev.rate_tv);
}
void
@@ -72,6 +109,7 @@ ns8250_init(int fd, uint32_t vmid)
errno = ret;
fatal("could not initialize com1 mutex");
}
+
com1_dev.fd = fd;
com1_dev.irq = 4;
com1_dev.portid = NS8250_COM1;
@@ -112,6 +150,10 @@ ns8250_init(int fd, uint32_t vmid)
timerclear(&com1_dev.rate_tv);
com1_dev.rate_tv.tv_usec = 10000;
evtimer_set(&com1_dev.rate, ratelimit, NULL);
+ evtimer_add(&com1_dev.rate, &com1_dev.rate_tv);
+
+ vm_pipe_init(&dev_pipe, ns8250_pipe_dispatch);
+ event_add(&dev_pipe.read_ev, NULL);
}
static void
@@ -137,7 +179,7 @@ com_rcv_event(int fd, short kind, void *
if (com1_dev.regs.lsr & LSR_RXRDY)
com1_dev.rcv_pending = 1;
else {
- com_rcv(&com1_dev, (uintptr_t)arg, 0);
+ com_rcv(&com1_dev, (uintptr_t)arg, 0, NS8250_DEV_THREAD);
/* If pending interrupt, inject */
if ((com1_dev.regs.iir & IIR_NOPEND) == 0) {
@@ -178,10 +220,10 @@ com_rcv_handle_break(struct ns8250_dev *
* com_rcv
*
* Move received byte into com data register.
- * Must be called with the mutex of the com device acquired
+ * Must be called with the mutex of the com device acquired.
*/
static void
-com_rcv(struct ns8250_dev *com, uint32_t vm_id, uint32_t vcpu_id)
+com_rcv(struct ns8250_dev *com, uint32_t vm_id, uint32_t vcpu_id, int8_t thread)
{
char buf[2];
ssize_t sz;
@@ -201,8 +243,13 @@ com_rcv(struct ns8250_dev *com, uint32_t
if (errno != EAGAIN)
log_warn("unexpected read error on com device");
} else if (sz == 0) {
- event_del(&com->event);
- event_add(&com->wake, NULL);
+ if (thread == NS8250_DEV_THREAD) {
+ event_del(&com->event);
+ event_add(&com->wake, NULL);
+ } else {
+ /* Called by vcpu thread, use pipe for event changes */
+ vm_pipe_send(&dev_pipe, NS8250_ZERO_READ);
+ }
return;
} else if (sz != 1 && sz != 2)
log_warnx("unexpected read return value %zd on com device", sz);
@@ -255,13 +302,12 @@ vcpu_process_com_data(struct vm_exit *ve
com1_dev.byte_out++;
if (com1_dev.regs.ier & IER_ETXRDY) {
- /* Limit output rate if needed */
- if (com1_dev.pause_ct > 0 &&
- com1_dev.byte_out % com1_dev.pause_ct == 0) {
- evtimer_add(&com1_dev.rate,
- &com1_dev.rate_tv);
- } else {
- /* Set TXRDY and clear "no pending interrupt" */
+ /* Set TXRDY and clear "no pending interrupt"
+ * only if we're not at the pause point. Otherwise
+ * the rate limiter timer event will handle it.
+ */
+ if (!(com1_dev.pause_ct > 0 &&
+ com1_dev.byte_out % com1_dev.pause_ct == 0)) {
com1_dev.regs.iir |= IIR_TXRDY;
com1_dev.regs.iir &= ~IIR_NOPEND;
}
@@ -300,7 +346,7 @@ vcpu_process_com_data(struct vm_exit *ve
com1_dev.regs.iir = 0x1;
if (com1_dev.rcv_pending)
- com_rcv(&com1_dev, vm_id, vcpu_id);
+ com_rcv(&com1_dev, vm_id, vcpu_id, NS8250_CPU_THREAD);
}
/* If pending interrupt, make sure it gets injected */
@@ -672,6 +718,7 @@ ns8250_stop()
{
if(event_del(&com1_dev.event))
log_warn("could not delete ns8250 event handler");
+ event_del(&com1_dev.wake);
evtimer_del(&com1_dev.rate);
}
Index: vm.c
===================================================================
RCS file: /cvs/src/usr.sbin/vmd/vm.c,v
retrieving revision 1.57
diff -u -p -u -p -r1.57 vm.c
--- vm.c 30 Apr 2020 03:50:53 -0000 1.57
+++ vm.c 7 Jun 2020 13:53:53 -0000
@@ -2243,3 +2243,50 @@ translate_gva(struct vm_exit* exit, uint
return (0);
}
+
+/*
+ * vm_pipe_init
+ *
+ * Initialize a vm_dev_pipe, setting up its file descriptors and its
+ * event structure with the given callback.
+ *
+ * Parameters:
+ * p: pointer to vm_dev_pipe struct to initizlize
+ * cb: callback to use for READ events on the read end of the pipe
+ */
+void
+vm_pipe_init(struct vm_dev_pipe *p, void (*cb)(int, short, void *))
+{
+ int ret;
+ int fds[2];
+
+ memset(p, 0, sizeof(struct vm_dev_pipe));
+
+ ret = pipe(fds);
+ if (ret) {
+ fatal("failed to create vm_dev_pipe pipe");
+ }
+ p->read = fds[0];
+ p->write = fds[1];
+
+ event_set(&p->read_ev, p->read, EV_READ | EV_PERSIST, cb, NULL);
+}
+
+/*
+ * vm_pipe_send
+ *
+ * Send a message to an emulated device vie the provided vm_dev_pipe.
+ * Messages should be of a value from enum pipe_msg_type.
+ *
+ * Parameters:
+ * p: pointer to initialized vm_dev_pipe
+ * msg: message (from pipe_msg_type enum) to send in the channel
+ */
+void
+vm_pipe_send(struct vm_dev_pipe *p, uint8_t msg)
+{
+ size_t n;
+ n = write(p->write, &msg, sizeof(msg));
+ if (n != sizeof(msg))
+ fatal("failed to write to device pipe");
+}
Index: vmd.h
===================================================================
RCS file: /cvs/src/usr.sbin/vmd/vmd.h,v
retrieving revision 1.98
diff -u -p -u -p -r1.98 vmd.h
--- vmd.h 12 Dec 2019 03:53:38 -0000 1.98
+++ vmd.h 7 Jun 2020 13:53:53 -0000
@@ -354,6 +354,20 @@ struct vmd {
int vmd_ptmfd;
};
+struct vm_dev_pipe {
+ int read;
+ int write;
+ struct event read_ev;
+};
+
+enum pipe_msg_type {
+ I8253_RESET_CHAN_0 = 0,
+ I8253_RESET_CHAN_1 = 1,
+ I8253_RESET_CHAN_2 = 2,
+ NS8250_ZERO_READ,
+ MC146818_RESCHEDULE_PER
+};
+
static inline struct sockaddr_in *
ss2sin(struct sockaddr_storage *ss)
{
@@ -442,6 +456,8 @@ int vmm_pipe(struct vmd_vm *, int, void
/* vm.c */
int start_vm(struct vmd_vm *, int);
__dead void vm_shutdown(unsigned int);
+void vm_pipe_init(struct vm_dev_pipe *, void (*)(int, short, void *));
+void vm_pipe_send(struct vm_dev_pipe *, uint8_t);
/* control.c */
int config_init(struct vmd *);
@voutilad
Copy link
Author

voutilad commented Jun 4, 2020

This version uses pipes to form simple channels between threads ensuring that only the event loop thread calls CUD operations on the event base. Should prevent timeheap corruption and segfaults.

Doesn't reliable pause/unpause with multiple guests for some reason...needs more debugging.

@voutilad
Copy link
Author

voutilad commented Jun 4, 2020

Now no more event mutexes that do effectively nothing in terms of protection since they can't lock inside the event loop itself.

Fixes a startup issue using rc(8) as well.

@voutilad
Copy link
Author

voutilad commented Jun 5, 2020

TODO:

  • fix some doc mistakes
  • some whitespace issues
  • figure out if my vcpu_assert_pic_irq/vcpu_deassert_pic_irq VM paused state check is sound or subject to race conditions (will the vm state change between entry of unpause vm function)

@voutilad
Copy link
Author

voutilad commented Jun 6, 2020

Fixed some whitespace and doc stuff.

Pausing under load where the guest CPU is 100% in userland and waiting on a timer just doesn't work yet, even in existing vmd(8). We need a way to properly interrupt the guest. In some hypervisors, there's guest support for officially "pausing", but there's supposed to be vmx/svm support for a guest pause register I believe. (Saw something in intel's docs on vmx...and I think KVM uses it?)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment