Created
May 10, 2017 14:56
-
-
Save ashwinyes/34cbdd999784402c859c71613587fafc to your computer and use it in GitHub Desktop.
Changes on top of http://dpdk.org/dev/patchwork/patch/24184/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From 1735d691e5b5d7fb3f0d2bcd6d1fbb9f7712d7af Mon Sep 17 00:00:00 2001 | |
From: Ashwin Sekhar T K <ashwin.sekhar@caviumnetworks.com> | |
Date: Wed, 10 May 2017 14:24:03 +0000 | |
Subject: [PATCH] Custom changes on top of l3fwd patch v2 | |
--- | |
examples/l3fwd/l3fwd_em_hlm.h | 2 - | |
examples/l3fwd/l3fwd_em_sequential.h | 2 - | |
examples/l3fwd/l3fwd_lpm_neon.h | 87 +++++++++++++++++++++++------------- | |
examples/l3fwd/l3fwd_neon.h | 2 +- | |
4 files changed, 57 insertions(+), 36 deletions(-) | |
diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h | |
index 10a9c95..098b396 100644 | |
--- a/examples/l3fwd/l3fwd_em_hlm.h | |
+++ b/examples/l3fwd/l3fwd_em_hlm.h | |
@@ -170,7 +170,6 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, | |
int32_t n = RTE_ALIGN_FLOOR(nb_rx, EM_HASH_LOOKUP_COUNT); | |
for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < nb_rx; j++) { | |
- rte_prefetch0(pkts_burst[j]); | |
rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], | |
struct ether_hdr *) + 1); | |
} | |
@@ -189,7 +188,6 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, | |
for (i = 0, pos = j + EM_HASH_LOOKUP_COUNT; | |
i < EM_HASH_LOOKUP_COUNT && pos < nb_rx; i++, pos++) { | |
- rte_prefetch0(pkts_burst[pos]); | |
rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[pos], | |
struct ether_hdr *) + 1); | |
} | |
diff --git a/examples/l3fwd/l3fwd_em_sequential.h b/examples/l3fwd/l3fwd_em_sequential.h | |
index 63c5c12..c7d477d 100644 | |
--- a/examples/l3fwd/l3fwd_em_sequential.h | |
+++ b/examples/l3fwd/l3fwd_em_sequential.h | |
@@ -109,14 +109,12 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, | |
uint16_t dst_port[MAX_PKT_BURST]; | |
if (nb_rx > 0) { | |
- rte_prefetch0(pkts_burst[0]); | |
rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[0], | |
struct ether_hdr *) + 1); | |
} | |
for (i = 1, j = 0; j < nb_rx; i++, j++) { | |
if (i < nb_rx) { | |
- rte_prefetch0(pkts_burst[i]); | |
rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i], | |
struct ether_hdr *) + 1); | |
} | |
diff --git a/examples/l3fwd/l3fwd_lpm_neon.h b/examples/l3fwd/l3fwd_lpm_neon.h | |
index 2f047b3..375101b 100644 | |
--- a/examples/l3fwd/l3fwd_lpm_neon.h | |
+++ b/examples/l3fwd/l3fwd_lpm_neon.h | |
@@ -117,46 +117,71 @@ static inline void | |
l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst, | |
uint8_t portid, struct lcore_conf *qconf) | |
{ | |
- int32_t i, j, pos; | |
+ int32_t j = 0, i = 0; | |
uint16_t dst_port[MAX_PKT_BURST]; | |
- int32x4_t dip[MAX_PKT_BURST / FWDSTEP]; | |
- uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP]; | |
+ int32x4_t dip; | |
+ uint32_t ipv4_flag; | |
const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP); | |
+ const int32_t m = nb_rx % FWDSTEP; | |
- for (j = 0; j < FWDSTEP && j < nb_rx; j++) { | |
- rte_prefetch0(pkts_burst[j]); | |
- rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], | |
- struct ether_hdr *) + 1); | |
- } | |
+ if (k) { | |
+ for (i = 0; i < FWDSTEP; i++) { | |
+ rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i], | |
+ struct ether_hdr *)); | |
+ } | |
- for (j = 0; j != k; j += FWDSTEP) { | |
- for (i = 0, pos = j + FWDSTEP; i < FWDSTEP && pos < nb_rx; | |
- i++, pos++) { | |
- rte_prefetch0(pkts_burst[pos]); | |
- rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[pos], | |
- struct ether_hdr *) + 1); | |
+ for (j = 0; j != k - FWDSTEP; j += FWDSTEP) { | |
+ for (i = 0; i < FWDSTEP; i++) { | |
+ rte_prefetch0(rte_pktmbuf_mtod( | |
+ pkts_burst[j + i + FWDSTEP], | |
+ struct ether_hdr *)); | |
+ } | |
+ processx4_step1(&pkts_burst[j], &dip, &ipv4_flag); | |
+ processx4_step2(qconf, dip, | |
+ ipv4_flag, portid, &pkts_burst[j], | |
+ &dst_port[j]); | |
} | |
- processx4_step1(&pkts_burst[j], &dip[j / FWDSTEP], | |
- &ipv4_flag[j / FWDSTEP]); | |
- processx4_step2(qconf, dip[j / FWDSTEP], | |
- ipv4_flag[j / FWDSTEP], portid, &pkts_burst[j], | |
+ processx4_step1(&pkts_burst[j], &dip, &ipv4_flag); | |
+ processx4_step2(qconf, dip, ipv4_flag, portid, &pkts_burst[j], | |
&dst_port[j]); | |
+ j += FWDSTEP; | |
} | |
- /* Classify last up to 3 packets one by one */ | |
- switch (nb_rx % FWDSTEP) { | |
- case 3: | |
- dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); | |
- j++; | |
- /* fallthrough */ | |
- case 2: | |
- dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); | |
- j++; | |
- /* fallthrough */ | |
- case 1: | |
- dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid); | |
- j++; | |
+ if (m) { | |
+ /* Prefetch last up to 3 packets one by one */ | |
+ switch (m) { | |
+ case 3: | |
+ rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], void *)); | |
+ j++; | |
+ /* fallthrough */ | |
+ case 2: | |
+ rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], void *)); | |
+ j++; | |
+ /* fallthrough */ | |
+ case 1: | |
+ rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], void *)); | |
+ j++; | |
+ } | |
+ | |
+ j -= m; | |
+ /* Classify last up to 3 packets one by one */ | |
+ switch (m) { | |
+ case 3: | |
+ dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], | |
+ portid); | |
+ j++; | |
+ /* fallthrough */ | |
+ case 2: | |
+ dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], | |
+ portid); | |
+ j++; | |
+ /* fallthrough */ | |
+ case 1: | |
+ dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], | |
+ portid); | |
+ j++; | |
+ } | |
} | |
send_packets_multi(qconf, pkts_burst, dst_port, nb_rx); | |
diff --git a/examples/l3fwd/l3fwd_neon.h b/examples/l3fwd/l3fwd_neon.h | |
index 75c8976..fe351db 100644 | |
--- a/examples/l3fwd/l3fwd_neon.h | |
+++ b/examples/l3fwd/l3fwd_neon.h | |
@@ -145,7 +145,7 @@ process_packet(struct rte_mbuf *pkt, uint16_t *dst_port) | |
rfc1812_process((struct ipv4_hdr *)(eth_hdr + 1), dst_port, | |
pkt->packet_type); | |
- ve = vsetq_lane_u32(vgetq_lane_u32(te, 3), ve, 3); | |
+ ve = vcopyq_laneq_u32(ve, 3, te, 3); | |
vst1q_u32((uint32_t *)eth_hdr, ve); | |
} | |
-- | |
2.1.4 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment