Skip to content

Instantly share code, notes, and snippets.

@gnif
Created October 7, 2018 00:11
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gnif/e4c001b608347b0b86118a2647103378 to your computer and use it in GitHub Desktop.
Save gnif/e4c001b608347b0b86118a2647103378 to your computer and use it in GitHub Desktop.
Nasty/hacky QEMU patch to enable PCIe x 16 Gen 3.0 for Qemu VFIO
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 6c91bd44a0..f3c7b9d328 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -39,6 +39,166 @@
#define PCIE_DEV_PRINTF(dev, fmt, ...) \
PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__)
+static uint16_t pcie_link_max_width(PCIDevice *dev)
+{
+ uint8_t *exp_cap;
+ uint32_t lnkcap;
+
+ exp_cap = dev->config + dev->exp.exp_cap;
+ lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP);
+
+ return lnkcap & PCI_EXP_LNKCAP_MLW;
+}
+
+static uint16_t pcie_link_current_width(PCIDevice *dev)
+{
+ uint8_t *exp_cap;
+ uint16_t lnksta;
+
+ exp_cap = dev->config + dev->exp.exp_cap;
+ lnksta = pci_get_word(exp_cap + PCI_EXP_LNKSTA);
+
+ return lnksta & PCI_EXP_LNKCAP_MLW;
+}
+
+static uint8_t pcie_link_speed_mask(PCIDevice *dev)
+{
+ uint8_t *exp_cap, speeds, mask;
+ uint16_t ver;
+ uint32_t lnkcap, lnkcap2 = 0;
+
+ exp_cap = dev->config + dev->exp.exp_cap;
+ lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP);
+ ver = pci_get_word(exp_cap + PCI_EXP_FLAGS) & PCI_EXP_FLAGS_VERS;
+ if (ver >= PCI_EXP_FLAGS_VER2 &&
+ dev->exp.exp_cap + PCI_EXP_LNKCAP2 < PCI_CONFIG_SPACE_SIZE) {
+ lnkcap2 = pci_get_long(exp_cap + PCI_EXP_LNKCAP2);
+ }
+
+ mask = (1 << (lnkcap & PCI_EXP_LNKCAP_SLS)) - 1;
+
+ /*
+ * If LNKCAP2 reports supported link speeds, then LNKCAP indexes
+ * the highest supported speed. Mask out the rest and return.
+ */
+ speeds = (lnkcap2 & PCI_EXP_LINKCAP2_SLSV) >> 1;
+ if (speeds) {
+ return speeds & mask;
+ }
+
+ /*
+ * Otherwise LNKCAP returns the maximum speed and the device supports
+ * all speeds below it. This is really only valid for 2.5 & 5GT/s
+ */
+ return mask;
+}
+
+static uint8_t pcie_link_current_speed(PCIDevice *dev)
+{
+ uint8_t *exp_cap;
+ uint16_t lnksta;
+
+ exp_cap = dev->config + dev->exp.exp_cap;
+ lnksta = pci_get_long(exp_cap + PCI_EXP_LNKSTA);
+
+ if (!(lnksta & PCI_EXP_LNKCAP_SLS)) {
+ return 0;
+ }
+
+ return 1 << ((lnksta & PCI_EXP_LNKCAP_SLS) - 1);
+}
+
+/*
+ * Negotiate the upstream link for PCIDevice @dev setting both the upstream
+ * and downstream LNKSTA. If @dev already reports link width and/or speed
+ * in LNKSTA they will be used as the preferred link parameters. LNKSTA
+ * is always set, using the preferred parameters if possible, followed by
+ * the best available link, followed by unknown (0) if an accurate value
+ * is not possible. The caller can read LNKSTA from @dev to determine the
+ * resulting link parameters.
+ */
+void pcie_negotiate_link(PCIDevice *dev)
+{
+ PCIDevice *parent;
+ uint16_t flags, width = 0;
+ uint8_t type, speed = 0;
+ PCIBus *bus = pci_get_bus(dev);
+
+ /* Skip non-express buses and Root Complex buses. */
+ if (!pci_bus_is_express(bus) || pci_bus_is_root(bus)) {
+ goto unknown;
+ }
+
+ /*
+ * Downstream ports don't negotiate with upstream ports, their link
+ * is negotiated by whatever is attached downstream to them. The
+ * same is true of root ports, but root ports are always attached to
+ * the root complex, so fall out above.
+ */
+ flags = pci_get_word(dev->config + dev->exp.exp_cap + PCI_EXP_FLAGS);
+ type = (flags & PCI_EXP_FLAGS_TYPE) >> PCI_EXP_FLAGS_TYPE_SHIFT;
+ if (type == PCI_EXP_TYPE_DOWNSTREAM) {
+ goto unknown;
+ }
+
+ /*
+ * Multifunction devices don't negotiate independent speeds, let
+ * function 0 do the negotiation and copy the results.
+ */
+ if (PCI_FUNC(dev->devfn)) {
+ PCIDevice *sibling;
+ uint16_t val;
+
+ sibling = pci_find_device(bus, pci_bus_num(bus),
+ PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
+ if (!sibling || !pci_is_express(sibling) || !sibling->exp.exp_cap) {
+ goto unknown;
+ }
+
+ val = pci_get_word(sibling->config + sibling->exp.exp_cap +
+ PCI_EXP_LNKSTA);
+
+ pci_set_word_by_mask(dev->config + dev->exp.exp_cap + PCI_EXP_LNKSTA,
+ PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS, val);
+ return;
+ }
+
+ parent = bus->parent_dev;
+
+ assert(pci_is_express(dev) && dev->exp.exp_cap &&
+ pci_is_express(parent) && parent->exp.exp_cap);
+
+ /*
+ * If LNKSTA reports a current/width speed and those values are actually
+ * compatibile with the device as reported by LNKCAP, use them as the
+ * target parameters. If the target values are incompatible, fall back
+ * to regular negotiation.
+ */
+ if (pcie_link_current_width(dev) &&
+ pcie_link_current_width(dev) <= pcie_link_max_width(dev) &&
+ pcie_link_current_width(dev) <= pcie_link_max_width(parent)) {
+ width = pcie_link_current_width(dev);
+ } else {
+ width = MIN(pcie_link_max_width(dev), pcie_link_max_width(parent));
+ }
+
+ if (pcie_link_current_speed(dev) & pcie_link_speed_mask(dev)) {
+ speed = 32 - clz32(pcie_link_current_speed(dev) &
+ pcie_link_speed_mask(parent));
+ }
+ if (!speed) {
+ speed = 32 - clz32(pcie_link_speed_mask(dev) &
+ pcie_link_speed_mask(parent));
+ }
+
+ pci_set_word_by_mask(parent->config + parent->exp.exp_cap + PCI_EXP_LNKSTA,
+ PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS,
+ width | speed);
+unknown:
+ pci_set_word_by_mask(dev->config + dev->exp.exp_cap + PCI_EXP_LNKSTA,
+ PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS,
+ width | speed);
+}
/***************************************************************************
* pci express capability helper functions
@@ -68,11 +228,11 @@ pcie_cap_v1_fill(PCIDevice *dev, uint8_t port, uint8_t type, uint8_t version)
pci_set_long(exp_cap + PCI_EXP_LNKCAP,
(port << PCI_EXP_LNKCAP_PN_SHIFT) |
PCI_EXP_LNKCAP_ASPMS_0S |
- PCI_EXP_LNK_MLW_1 |
- PCI_EXP_LNK_LS_25);
+ PCI_EXP_LNK_MLW_16 |
+ PCI_EXP_LNK_LS_80);
pci_set_word(exp_cap + PCI_EXP_LNKSTA,
- PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25);
+ PCI_EXP_LNK_MLW_16 | PCI_EXP_LNK_LS_80);
if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) {
pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA,
@@ -111,6 +271,15 @@ int pcie_cap_init(PCIDevice *dev, uint8_t offset,
pci_set_long(exp_cap + PCI_EXP_DEVCAP2,
PCI_EXP_DEVCAP2_EFF | PCI_EXP_DEVCAP2_EETLPP);
+ pci_set_long(exp_cap + PCI_EXP_LNKCAP,
+ (port << PCI_EXP_LNKCAP_PN_SHIFT) |
+ PCI_EXP_LNKCAP_ASPMS_0S |
+ PCI_EXP_LNK_MLW_16 |
+ PCI_EXP_LNK_LS_80);
+
+ pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP,
+ PCI_EXP_LNKCAP_ASPMS_L0S);
+
pci_set_word(dev->wmask + pos + PCI_EXP_DEVCTL2, PCI_EXP_DEVCTL2_EETLPPB);
if (dev->cap_present & QEMU_PCIE_EXTCAP_INIT) {
@@ -118,6 +287,23 @@ int pcie_cap_init(PCIDevice *dev, uint8_t offset,
pci_set_long(dev->wmask + PCI_CONFIG_SPACE_SIZE, 0);
}
+ if (type == PCI_EXP_TYPE_ROOT_PORT || type == PCI_EXP_TYPE_DOWNSTREAM) {
+ pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP, PCI_EXP_LNKCAP_LBNC);
+ }
+
+ pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2,
+ PCI_EXP_LNK2_LS_25 |
+ PCI_EXP_LNK2_LS_50 |
+ PCI_EXP_LNK2_LS_80);
+
+ if (type == PCI_EXP_TYPE_DOWNSTREAM) {
+ pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP,
+ PCI_EXP_LNKCAP_DLLLARC);
+ pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA,
+ PCI_EXP_LNKSTA_DLLLA);
+ }
+
+ pcie_negotiate_link(dev);
return pos;
}
@@ -537,7 +723,7 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
hotplug_event_notify(dev);
- /*
+ /*
* 6.7.3.2 Command Completed Events
*
* Software issues a command to a hot-plug capable Downstream Port by
@@ -763,4 +949,4 @@ void pcie_ats_init(PCIDevice *dev, uint16_t offset)
pci_set_word(dev->config + offset + PCI_ATS_CTRL, 0);
pci_set_word(dev->wmask + dev->exp.ats_cap + PCI_ATS_CTRL, 0x800f);
-}
+}
\ No newline at end of file
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 6cbb8fa054..523bc81932 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1886,6 +1886,7 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size,
}
} else {
+#if 0
/*
* Convert Root Complex Integrated Endpoints to regular endpoints.
* These devices don't support LNK/LNK2 capabilities, so make them up.
@@ -1904,6 +1905,7 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size,
pci_get_word(vdev->pdev.config + pos +
PCI_EXP_LNKSTA),
PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS);
+#endif
}
/*
@@ -3239,4 +3241,4 @@ static void register_vfio_pci_dev_type(void)
type_register_static(&vfio_pci_dev_info);
}
-type_init(register_vfio_pci_dev_type)
+type_init(register_vfio_pci_dev_type)
\ No newline at end of file
diff --git a/include/hw/pci/pcie_regs.h b/include/hw/pci/pcie_regs.h
index a95522a13b..bda852760f 100644
--- a/include/hw/pci/pcie_regs.h
+++ b/include/hw/pci/pcie_regs.h
@@ -35,13 +35,23 @@
/* PCI_EXP_LINK{CAP, STA} */
/* link speed */
#define PCI_EXP_LNK_LS_25 1
+#define PCI_EXP_LNK_LS_50 2
+#define PCI_EXP_LNK_LS_80 3
#define PCI_EXP_LNK_MLW_SHIFT ctz32(PCI_EXP_LNKCAP_MLW)
-#define PCI_EXP_LNK_MLW_1 (1 << PCI_EXP_LNK_MLW_SHIFT)
+#define PCI_EXP_LNK_MLW_1 (1 << PCI_EXP_LNK_MLW_SHIFT)
+#define PCI_EXP_LNK_MLW_2 (2 << PCI_EXP_LNK_MLW_SHIFT)
+#define PCI_EXP_LNK_MLW_4 (4 << PCI_EXP_LNK_MLW_SHIFT)
+#define PCI_EXP_LNK_MLW_8 (8 << PCI_EXP_LNK_MLW_SHIFT)
+#define PCI_EXP_LNK_MLW_12 (12 << PCI_EXP_LNK_MLW_SHIFT)
+#define PCI_EXP_LNK_MLW_16 (16 << PCI_EXP_LNK_MLW_SHIFT)
/* PCI_EXP_LINKCAP */
#define PCI_EXP_LNKCAP_ASPMS_SHIFT ctz32(PCI_EXP_LNKCAP_ASPMS)
#define PCI_EXP_LNKCAP_ASPMS_0S (1 << PCI_EXP_LNKCAP_ASPMS_SHIFT)
+#define PCI_EXP_LNKCAP_ASPMS_L0S (1 << PCI_EXP_LNKCAP_ASPMS_SHIFT)
+#define PCI_EXP_LNKCAP_ASPMS_L1 (2 << PCI_EXP_LNKCAP_ASPMS_SHIFT)
+#define PCI_EXP_LNKCAP_ASPMS_L0SL1 (3 << PCI_EXP_LNKCAP_ASPMS_SHIFT)
#define PCI_EXP_LNKCAP_PN_SHIFT ctz32(PCI_EXP_LNKCAP_PN)
@@ -75,6 +85,13 @@
#define PCI_EXP_DEVCTL2_EETLPPB 0x8000
+#define PCI_EXP_LNKCAP2 44 /* Link Capabilities 2 */
+#define PCI_EXP_LINKCAP2_SLSV 0x000000fe /* Supported Link Speeds Vector */
+#define PCI_EXP_LNKSTA2 50 /* Link Status 2 */
+#define PCI_EXP_LNK2_LS_25 (1 << 1)
+#define PCI_EXP_LNK2_LS_50 (1 << 2)
+#define PCI_EXP_LNK2_LS_80 (1 << 3)
+
/* ARI */
#define PCI_ARI_VER 1
#define PCI_ARI_SIZEOF 8
@@ -156,4 +173,4 @@
PCI_ERR_COR_INTERNAL | \
PCI_ERR_COR_HL_OVERFLOW)
-#endif /* QEMU_PCIE_REGS_H */
+#endif /* QEMU_PCIE_REGS_H */
\ No newline at end of file
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment