Skip to content

Instantly share code, notes, and snippets.

@micchie
Last active April 13, 2021 16:21
Show Gist options
  • Save micchie/90585a656bb313f1015d7f6fb5c40fa3 to your computer and use it in GitHub Desktop.
Save micchie/90585a656bb313f1015d7f6fb5c40fa3 to your computer and use it in GitHub Desktop.
netmap: recipe for i40e checksum offload (csum_done before fdtable_add)
From cce94330056edd64e0247a9a3445244d2c0ba032 Mon Sep 17 00:00:00 2001
From: Michio Honda <michio.honda@ed.ac.uk>
Date: Tue, 13 Apr 2021 17:18:52 +0100
Subject: [PATCH] Add support for i40e hwcsum
---
LINUX/i40e_netmap_linux.h | 57 +++++++++++++++++++++++++++++++++++
sys/dev/netmap/netmap_kern.h | 1 +
sys/dev/netmap/netmap_paste.c | 13 ++++++++
3 files changed, 71 insertions(+)
diff --git a/LINUX/i40e_netmap_linux.h b/LINUX/i40e_netmap_linux.h
index 0f6018888..586a4b4aa 100644
--- a/LINUX/i40e_netmap_linux.h
+++ b/LINUX/i40e_netmap_linux.h
@@ -280,6 +280,7 @@ i40e_netmap_attach(struct i40e_vsi *vsi)
na.ifp = vsi->netdev;
na.pdev = &vsi->back->pdev->dev;
na.na_flags = NAF_MOREFRAG | NAF_OFFSETS;
+ na.na_flags |= NAF_CSUM;
na.num_tx_desc = NM_I40E_TX_RING(vsi, 0)->count;
na.num_rx_desc = NM_I40E_RX_RING(vsi, 0)->count;
na.num_tx_rings = na.num_rx_rings = vsi->num_queue_pairs;
@@ -427,6 +428,16 @@ i40e_netmap_txsync(struct netmap_kring *kring, int flags)
/* Fill the slot in the NIC ring.
* (we should investigate if using legacy descriptors
* is faster). */
+#ifdef WITH_PASTE
+ if (slot->flags & NS_CSUM) {
+ u32 cmd = NMCB_BUF(NMB(na, slot))->cmd;
+ u32 off = NMCB_BUF(NMB(na, slot))->off;
+ hw_flags |=
+ ((u64)cmd << I40E_TXD_QW1_CMD_SHIFT) |
+ ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT);
+ slot->flags &= ~NS_CSUM;
+ }
+#endif /* WITH_PASTE */
curr->buffer_addr = htole64(paddr + offset);
curr->cmd_type_offset_bsz = htole64(
((u64)len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
@@ -474,6 +485,49 @@ i40e_netmap_txsync(struct netmap_kring *kring, int flags)
return 0;
}
+static inline int
+i40e_netmap_rx_checksum(uint64_t qword, uint32_t rx_status)
+{
+ bool ipv4, ipv6;
+ struct i40e_rx_ptype_decoded decoded;
+ u8 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
+ >> I40E_RXD_QW1_PTYPE_SHIFT;
+ u32 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK)
+ >> I40E_RXD_QW1_ERROR_SHIFT;
+
+ if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
+ return 0;
+ decoded = decode_rx_desc_ptype(ptype);
+ if (!(decoded.known && decoded.outer_ip))
+ return 0;
+ ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) &&
+ (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4);
+ ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) &&
+ (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6);
+ if (ipv4 &&
+ (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) |
+ BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT))))
+ return -1;
+ if (ipv6 &&
+ rx_status & BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
+ return 0;
+ if (rx_error & BIT(I40E_RX_DESC_ERROR_L4E_SHIFT))
+ return -1;
+ if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
+ return 0;
+ if (decoded.tunnel_type >= I40E_RX_PTYPE_TUNNEL_IP_GRENAT)
+ return 0; // skb->csum_level = 1
+ switch (decoded.inner_prot) {
+ case I40E_RX_PTYPE_INNER_PROT_TCP:
+ case I40E_RX_PTYPE_INNER_PROT_UDP:
+ case I40E_RX_PTYPE_INNER_PROT_SCTP:
+ return 1; /* CHECKSUM_UNNECESSARY */
+ /* fall though */
+ default:
+ break;
+ }
+ return 0;
+}
/*
* Reconcile kernel and user view of the receive ring.
@@ -579,6 +633,9 @@ i40e_netmap_rxsync(struct netmap_kring *kring, int flags)
netmap_sync_map_cpu(na, (bus_dma_tag_t) na->pdev,
&paddr, slot->len, NR_RX);
+ if ((ifp->features & NETIF_F_RXCSUM) &&
+ (i40e_netmap_rx_checksum(qword, staterr) == 1))
+ slot->flags |= NS_CSUM;
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h
index bc8637849..8b1548215 100644
--- a/sys/dev/netmap/netmap_kern.h
+++ b/sys/dev/netmap/netmap_kern.h
@@ -789,6 +789,7 @@ struct netmap_adapter {
#define NAF_MOREFRAG 512 /* the adapter supports NS_MOREFRAG */
#define NAF_OFFSETS 1024 /* the adapter supports the slot offsets */
#define NAF_HOST_ALL 2048 /* the adapter wants as many host rings as hw */
+#define NAF_CSUM 4096 /* the adapter supports checksum offload */
#define NAF_ZOMBIE (1U<<30) /* the nic driver has been unloaded */
#define NAF_BUSY (1U<<31) /* the adapter is used internally and
* cannot be registered from userspace
diff --git a/sys/dev/netmap/netmap_paste.c b/sys/dev/netmap/netmap_paste.c
index ebf474db9..d6de5da6b 100644
--- a/sys/dev/netmap/netmap_paste.c
+++ b/sys/dev/netmap/netmap_paste.c
@@ -892,6 +892,12 @@ netmap_pst_transmit(struct ifnet *ifp, struct mbuf *m)
iph = (struct nm_iphdr *)(nmb + v + MBUF_NETWORK_OFFSET(m));
tcph = (struct nm_tcphdr *)(nmb + v + MBUF_TRANSPORT_OFFSET(m));
#ifdef linux
+ if (na->na_flags & NAF_CSUM) {
+ if (likely(!csum_ctx(&cb->cmd, &cb->off, iph, tcph))) {
+ slot->flags |= NS_CSUM;
+ goto csum_done;
+ }
+ }
m->ip_summed = CHECKSUM_COMPLETE;
#endif
check = &tcph->check;
@@ -899,6 +905,7 @@ netmap_pst_transmit(struct ifnet *ifp, struct mbuf *m)
len = slot->len - MBUF_TRANSPORT_OFFSET(m);
nm_os_csum_tcpudp_ipv4(iph, tcph, len, check);
}
+csum_done:
pst_fdtable_add(cb, kring);
/* the stack might hold reference via clone, so let's see */
@@ -1154,6 +1161,12 @@ netmap_pst_bwrap_reg(struct netmap_adapter *na, int onoff)
PST_DBG("%s: only one NIC is supported", na->name);
return ENOTSUP;
}
+ if (hwna->na_flags & NAF_CSUM) {
+ struct netmap_adapter *mna = pst_na(na);
+ if (!mna)
+ panic("x");
+ mna->na_flags |= NAF_CSUM;
+ }
/* netmap_do_regif just created rings. As we cannot rely on
* netmap_offsets_init, we set offsets here.
*/
--
2.17.1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment