Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lukashino/8fe387caabe2750d396bc97f2d66066a to your computer and use it in GitHub Desktop.
Save lukashino/8fe387caabe2750d396bc97f2d66066a to your computer and use it in GitHub Desktop.
Setting RSS with a symmetric toeplitz hash function on i40e (X710/XL710/XXV710/X722) with DPDK rte_flow
// Please see https://github.com/lukashino/i40e-symmetric-rss-rte-flow
//
// Created by lukashino
//
#include <signal.h>
#include <unistd.h>
#include <rte_ethdev.h>
#include <rte_malloc.h>
#include <rte_hash.h>
#include <rte_ip.h>
#include <rte_eal.h>
#include <rte_ethdev.h>
#include <rte_launch.h>
#include <rte_lcore.h>
#include <rte_log.h>
#include <rte_mempool.h>
#include <rte_mbuf.h>
#include <rte_flow.h>
#define BURST_SIZE 32
struct rte_fdir_conf fdir_conf = {
.mode = RTE_FDIR_MODE_NONE,
.pballoc = RTE_FDIR_PBALLOC_64K,
.status = RTE_FDIR_REPORT_STATUS,
.mask = {
.vlan_tci_mask = 0xFFEF,
.ipv4_mask = {
.src_ip = 0xFFFFFFFF,
.dst_ip = 0xFFFFFFFF,
},
.ipv6_mask = {
.src_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
.dst_ip = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
},
.src_port_mask = 0xFFFF,
.dst_port_mask = 0xFFFF,
.mac_addr_byte_mask = 0xFF,
.tunnel_type_mask = 1,
.tunnel_id_mask = 0xFFFFFFFF,
},
.drop_queue = 127,
};
#define MTU_SIZE 3000
#define NB_RX_DESC 8192
#define NB_TS_DESC 8192
#define MEMPOOL_SIZE 262143
#define MEMPOOL_CACHE_SIZE 500
#define MULTICAST_ENABLE 1
#define PROMISC_ENABLE 1
#define I40E_RSS_HKEY_LEN 52
static volatile int g_should_stop = 0;
static void handle_sig(int sig)
{
switch (sig) {
case SIGINT:
case SIGTERM:
g_should_stop = 1;
break;
}
}
static void handle_broken_pmd(int sig)
{
#define HINT "segfault, try --no-pci or start as root\n"
if (sig != SIGSEGV)
return;
write(STDERR_FILENO, HINT, sizeof(HINT) - 1);
signal(sig, SIG_DFL);
kill(getpid(), sig);
}
static int should_stop(void)
{
return g_should_stop;
}
struct context {
int socket_id;
uint16_t nb_queues;
const char *port1_name;
uint16_t port1_id;
uint16_t nb_rx_desc;
uint16_t nb_tx_desc;
uint32_t mempool_size;
uint32_t mempool_cache_size;
struct rte_mempool **port1_pkt_mempool_array; // toto potom treba prerobit na pole poli poli
rte_atomic16_t queue_id; // from zero to lcore count
};
static void thread_loop(uint16_t port_id, uint16_t queue_id)
{
int retval;
struct rte_mbuf *pkts[BURST_SIZE] = { NULL };
uint16_t rx_count;
uint64_t tot_rx_count = 0;
while (!should_stop()) {
rx_count = rte_eth_rx_burst(port_id, queue_id, pkts, BURST_SIZE);
if (rx_count > 0) {
tot_rx_count += rx_count;
printf("Received %d packet/s on lcore %d\n", rx_count, rte_lcore_id());
for (int i = 0; i < rx_count; i++)
rte_pktmbuf_free(pkts[i]);
}
}
printf("Lcoreid %d total rx count: %lu\n", rte_lcore_id(), tot_rx_count);
}
static int thread_main(void *arg)
{
int switch_ifaces = 0;
struct context *ctx = arg;
int qid = rte_atomic16_add_return(&ctx->queue_id, 1);
thread_loop(ctx->port1_id, qid);
printf("Thread %d finished\n", rte_lcore_id());
return 0;
}
static int device_init_port_conf(const struct context *ctx, const struct rte_eth_dev_info *dev_info,
struct rte_eth_conf *port_conf)
{
*port_conf = (struct rte_eth_conf){
.rxmode = {
.max_rx_pkt_len = RTE_ETHER_MAX_LEN,
.offloads = DEV_RX_OFFLOAD_CHECKSUM,
},
.txmode = {
},
};
// offload checksum validation
port_conf->rxmode.offloads |= DEV_RX_OFFLOAD_CHECKSUM;
port_conf->fdir_conf = fdir_conf;
return 0;
}
int device_configure_queues(struct context *ctx, const struct rte_eth_dev_info *dev_info,
const struct rte_eth_conf *port_conf)
{
int retval;
struct rte_eth_rxconf rxq_conf;
struct rte_eth_txconf txq_conf;
ctx->port1_pkt_mempool_array =
rte_calloc("mempool_array", ctx->nb_queues, sizeof(struct rte_mempool *), 0);
if (unlikely(ctx->port1_pkt_mempool_array == NULL)) {
fprintf(stderr, "Could not allocate memory for packet mempool pointers\n");
return -ENOMEM;
}
for (uint16_t queue_id = 0; queue_id < ctx->nb_queues; queue_id++) {
char mempool_name[64];
snprintf(mempool_name, 64, "pktmbuf_pool_p%d_q%d", ctx->port1_id, queue_id);
printf("Creating a packet mbuf pool %s of size %d, cache size %d, mbuf size %d\n",
mempool_name, ctx->mempool_size, ctx->mempool_cache_size,
RTE_MBUF_DEFAULT_BUF_SIZE);
ctx->port1_pkt_mempool_array[queue_id] = rte_pktmbuf_pool_create(mempool_name, ctx->mempool_size,
ctx->mempool_cache_size, 0, RTE_MBUF_DEFAULT_BUF_SIZE, (int)rte_socket_id());
if (ctx->port1_pkt_mempool_array[queue_id] == NULL) {
retval = -rte_errno;
return retval;
}
rxq_conf = dev_info->default_rxconf;
rxq_conf.offloads = port_conf->rxmode.offloads;
rxq_conf.rx_thresh.hthresh = 0;
rxq_conf.rx_thresh.pthresh = 0;
rxq_conf.rx_thresh.wthresh = 0;
rxq_conf.rx_free_thresh = 0;
rxq_conf.rx_drop_en = 0;
printf("Creating Q %d of P %d using desc RX: %d TX: %d RX htresh: %d RX pthresh %d wtresh "
"%d free_tresh %d drop_en %d Offloads %lu\n",
queue_id, ctx->port1_id, ctx->nb_rx_desc, ctx->nb_tx_desc,
rxq_conf.rx_thresh.hthresh, rxq_conf.rx_thresh.pthresh, rxq_conf.rx_thresh.wthresh,
rxq_conf.rx_free_thresh, rxq_conf.rx_drop_en, rxq_conf.offloads);
retval = rte_eth_rx_queue_setup(ctx->port1_id, queue_id, ctx->nb_rx_desc, ctx->socket_id,
&rxq_conf, ctx->port1_pkt_mempool_array[queue_id]);
if (retval < 0) {
fprintf(stderr, "Error (err=%d) during initialization of device queue %u of port %u\n",
retval, queue_id, ctx->port1_id);
return retval;
}
}
for (uint16_t queue_id = 0; queue_id < ctx->nb_queues; queue_id++) {
txq_conf = dev_info->default_txconf;
txq_conf.offloads = port_conf->txmode.offloads;
retval = rte_eth_tx_queue_setup(
ctx->port1_id, queue_id, ctx->nb_tx_desc, ctx->socket_id, &txq_conf);
if (retval < 0) {
fprintf(stderr, "Error (err=%d) during initialization of device queue %u of port %u\n",
retval, queue_id, ctx->port1_id);
return retval;
}
}
return 0;
}
static int device_configure(struct context *ctx)
{
// configure device
int retval;
struct rte_eth_dev_info dev_info;
struct rte_eth_conf port_conf;
retval = rte_eth_dev_get_port_by_name(ctx->port1_name, &(ctx->port1_id));
if (retval < 0) {
fprintf(stderr, "Error (err=%d) when getting port id of %s Is device enabled?\n", retval,
ctx->port1_name);
return retval;
}
if (!rte_eth_dev_is_valid_port(ctx->port1_id)) {
fprintf(stderr, "Specified port %d is invalid\n", ctx->port1_id);
return retval;
}
retval = rte_eth_dev_socket_id(ctx->port1_id);
if (retval < 0) {
fprintf(stderr, "Error (err=%d) invalid socket id (port %u)\n", retval, ctx->port1_id);
return retval;
} else {
ctx->socket_id = retval;
}
retval = rte_eth_dev_info_get(ctx->port1_id, &dev_info);
if (retval != 0) {
fprintf(stderr, "Error (err=%d) during getting device info (port %u)\n", retval,
ctx->port1_id);
return retval;
}
retval = device_init_port_conf(ctx, &dev_info, &port_conf);
if (retval != 0) {
fprintf(stderr, "Error (err=%d) during port init (port %u)\n", retval,
ctx->port1_id);
return retval;
}
retval = rte_eth_dev_configure(ctx->port1_id, ctx->nb_queues, ctx->nb_queues, &port_conf);
if (retval != 0) {
fprintf(stderr, "Error (err=%d) during configuring the device (port %u)\n", retval,
ctx->port1_id);
return retval;
}
retval = rte_eth_dev_adjust_nb_rx_tx_desc(ctx->port1_id, &ctx->nb_rx_desc, &ctx->nb_tx_desc);
if (retval != 0) {
fprintf(stderr, "Error (err=%d) during adjustment of device queues descriptors (port %u)\n",
retval, ctx->port1_id);
return retval;
}
retval = MULTICAST_ENABLE ? rte_eth_allmulticast_enable(ctx->port1_id)
: rte_eth_allmulticast_disable(ctx->port1_id);
if (retval < 0) {
fprintf(stderr, "Error (err=%d) when en/disabling multicast on port %s\n", retval,
ctx->port1_name);
return retval;
}
retval = PROMISC_ENABLE ? rte_eth_promiscuous_enable(ctx->port1_id)
: rte_eth_promiscuous_disable(ctx->port1_id);
if (retval < 0) {
fprintf(stderr, "Error (err=%d) when enabling promiscuous mode on port %u\n", retval,
ctx->port1_id);
rte_eal_cleanup();
return EXIT_FAILURE;
}
retval = rte_eth_dev_set_mtu(ctx->port1_id, MTU_SIZE);
if (retval < 0) {
fprintf(stderr, "Error (err=%d) when setting MTU to %u on port %u\n", retval, MTU_SIZE,
ctx->port1_id);
return retval;
}
retval = device_configure_queues(ctx, &dev_info, &port_conf);
if (retval < 0) {
fprintf(stderr, "Error (err=%d) when configuring queues on port %u\n", retval, ctx->port1_id);
return retval;
}
return 0;
}
static void device_rss_configure(int portid, int nb_queues) {
int retval;
struct rte_flow_item pattern[] = { { 0 }, { 0 }, { 0 }, { 0 } };
struct rte_flow_action action[] = { { 0 }, { 0 } };
struct rte_flow_action_rss rss_action_conf = { 0 };
struct rte_flow_attr attr = { 0 };
struct rte_flow_error create_error = { 0 };
struct rte_flow_error valid_error = { 0 };
struct rte_flow_error flush_error = { 0 };
uint16_t queues[RTE_MAX_QUEUES_PER_PORT];
uint8_t rss_hkey[I40E_RSS_HKEY_LEN];
struct rte_eth_rss_conf rss_conf = {
.rss_key = rss_hkey,
.rss_key_len = I40E_RSS_HKEY_LEN,
};
struct rte_flow *flow;
retval = rte_eth_dev_rss_hash_conf_get(portid, &rss_conf);
if (retval != 0) {
fprintf(stderr, "FATALERROR!!! HASH CONF NO SUCCESS\n");
exit(1);
}
for (uint16_t i = 0; i < nb_queues; ++i) {
queues[i] = i;
}
rss_action_conf.func = RTE_ETH_HASH_FUNCTION_DEFAULT;
rss_action_conf.level = 0;
rss_action_conf.types = 0; // queues region can not be configured with types
rss_action_conf.key = rss_conf.rss_key;
rss_action_conf.key_len = rss_conf.rss_key_len;
rss_action_conf.queue_num = nb_queues;
rss_action_conf.queue = queues;
attr.ingress = 1;
pattern[0].type = RTE_FLOW_ITEM_TYPE_END;
action[0].type = RTE_FLOW_ACTION_TYPE_RSS;
action[0].conf = &rss_action_conf;
action[1].type = RTE_FLOW_ACTION_TYPE_END;
flow = rte_flow_create(portid, &attr, pattern, action, &create_error);
if (flow == NULL) {
fprintf(stderr, "ERROR Create errror: %s\n", create_error.message);
int ret = rte_flow_validate(portid, &attr, pattern, action, &valid_error);
fprintf(stderr, "FATALERROR!!! Err on flow validation: %s \n errmsg: %s\n",
rte_strerror(-ret), valid_error.message);
exit(1);
} else {
printf("RULE1 created\n");
}
memset(pattern, 0, sizeof(pattern));
memset(action, 0, sizeof(action));
memset(&rss_action_conf, 0, sizeof(rss_action_conf));
pattern[0].type = RTE_FLOW_ITEM_TYPE_ETH;
pattern[1].type = RTE_FLOW_ITEM_TYPE_IPV4;
pattern[2].type = RTE_FLOW_ITEM_TYPE_END;
rss_action_conf.func = RTE_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ;
rss_action_conf.level = 0;
rss_action_conf.types = ETH_RSS_NONFRAG_IPV4_OTHER;
rss_action_conf.key_len = rss_conf.rss_key_len;
rss_action_conf.key = rss_conf.rss_key;
rss_action_conf.queue_num = 0;
rss_action_conf.queue = NULL;
action[0].type = RTE_FLOW_ACTION_TYPE_RSS;
action[0].conf = &rss_action_conf;
action[1].type = RTE_FLOW_ACTION_TYPE_END;
flow = rte_flow_create(portid, &attr, pattern, action, &create_error);
if (flow == NULL) {
fprintf(stderr, "ERROR Create errror: %s\n", create_error.message);
int ret = rte_flow_validate(portid, &attr, pattern, action, &valid_error);
fprintf(stderr, "FATALERROR!!! Err on flow validation: %s \n errmsg: %s\n",
rte_strerror(-ret), valid_error.message);
exit(1);
} else {
printf("RULE2 created\n");
}
memset(pattern, 0, sizeof(pattern));
memset(action, 0, sizeof(action));
memset(&rss_action_conf, 0, sizeof(rss_action_conf));
pattern[0].type = RTE_FLOW_ITEM_TYPE_ETH;
pattern[1].type = RTE_FLOW_ITEM_TYPE_IPV4;
pattern[2].type = RTE_FLOW_ITEM_TYPE_TCP;
pattern[3].type = RTE_FLOW_ITEM_TYPE_END;
rss_action_conf.func = RTE_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ;
rss_action_conf.level = 0;
rss_action_conf.types = ETH_RSS_NONFRAG_IPV4_TCP;
rss_action_conf.key_len = rss_conf.rss_key_len;
rss_action_conf.key = rss_conf.rss_key;
rss_action_conf.queue_num = 0;
rss_action_conf.queue = NULL;
action[0].type = RTE_FLOW_ACTION_TYPE_RSS;
action[0].conf = &rss_action_conf;
action[1].type = RTE_FLOW_ACTION_TYPE_END;
flow = rte_flow_create(portid, &attr, pattern, action, &create_error);
if (flow == NULL) {
fprintf(stderr, "ERROR Create errror: %s\n", create_error.message);
int ret = rte_flow_validate(portid, &attr, pattern, action, &valid_error);
fprintf(stderr, "FATALERROR!!! Err on flow validation: %s \n errmsg: %s\n",
rte_strerror(-ret), valid_error.message);
exit(1);
} else {
printf("RULE3 created\n");
}
memset(pattern, 0, sizeof(pattern));
memset(action, 0, sizeof(action));
memset(&rss_action_conf, 0, sizeof(rss_action_conf));
pattern[0].type = RTE_FLOW_ITEM_TYPE_ETH;
pattern[1].type = RTE_FLOW_ITEM_TYPE_IPV4;
pattern[2].type = RTE_FLOW_ITEM_TYPE_UDP;
pattern[3].type = RTE_FLOW_ITEM_TYPE_END;
rss_action_conf.func = RTE_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ;
rss_action_conf.level = 0;
rss_action_conf.types = ETH_RSS_NONFRAG_IPV4_UDP;
rss_action_conf.key_len = rss_conf.rss_key_len;
rss_action_conf.key = rss_conf.rss_key;
rss_action_conf.queue_num = 0;
rss_action_conf.queue = NULL;
action[0].type = RTE_FLOW_ACTION_TYPE_RSS;
action[0].conf = &rss_action_conf;
action[1].type = RTE_FLOW_ACTION_TYPE_END;
flow = rte_flow_create(portid, &attr, pattern, action, &create_error);
if (flow == NULL) {
fprintf(stderr, "ERROR Create errror: %s\n", create_error.message);
int ret = rte_flow_validate(portid, &attr, pattern, action, &valid_error);
fprintf(stderr, "FATALERROR!!! Err on flow validation: %s \n errmsg: %s\n",
rte_strerror(-ret), valid_error.message);
exit(1);
} else {
printf("RULE4 created\n");
}
}
int main(int argc, char *argv[])
{
struct context ctx;
char *iface1 = NULL;
char *iface2 = NULL;
int proc_type;
int args;
int retval;
rte_log_set_global_level(RTE_LOG_WARNING);
signal(SIGSEGV, &handle_broken_pmd);
args = rte_eal_init(argc, argv);
if (args < 0) {
fprintf(stderr, "rte_eal_init() has failed: %d\n", args);
return EXIT_FAILURE;
}
signal(SIGSEGV, SIG_DFL);
argc -= args;
argv += args;
if (argc <= 1) {
fprintf(stderr, "Use: %s PCIe_address#1 [PCIe_address#2]\n", argv[0]);
fprintf(stderr, "The second NIC then enables the IPS mode\n");
rte_eal_cleanup();
return EXIT_FAILURE;
}
iface1 = argv[1];
ctx.port1_name = iface1;
ctx.socket_id = (int)rte_socket_id();
ctx.nb_queues = rte_lcore_count();
ctx.nb_rx_desc = NB_RX_DESC;
ctx.nb_tx_desc = NB_TS_DESC;
ctx.mempool_size = MEMPOOL_SIZE;
ctx.mempool_cache_size = MEMPOOL_CACHE_SIZE;
rte_atomic16_init(&ctx.queue_id);
rte_atomic16_set(&ctx.queue_id, -1);
retval = device_configure(&ctx);
if (retval < 0) {
fprintf(stderr, "device_configure(): unable to configure the device, %d\n", retval);
rte_eal_cleanup();
return EXIT_FAILURE;
}
proc_type = rte_eal_process_type();
if (proc_type != RTE_PROC_PRIMARY) {
fprintf(stderr, "invalid process type %d, primary required\n", proc_type);
rte_eal_cleanup();
return EXIT_FAILURE;
}
retval = rte_eth_dev_start(ctx.port1_id);
if (retval < 0) {
fprintf(stderr, "Error (err=%d) during device startup of port %u", retval, ctx.port1_id);
rte_eal_cleanup();
return EXIT_FAILURE;
}
// Configure the RSS via RTE_FLOW
device_rss_configure(ctx.port1_id, ctx.nb_queues);
printf("\nStarting all cores ... [Ctrl+C to quit]\n\n\n");
signal(SIGINT, &handle_sig);
signal(SIGTERM, &handle_sig);
rte_eal_mp_remote_launch(thread_main, &ctx, CALL_MASTER);
rte_eal_mp_wait_lcore();
struct rte_flow_error flush_error = { 0 };
retval = rte_flow_flush(ctx.port1_id, &flush_error);
if (retval != 0) {
fprintf(stderr, "FATALERROR!!! Err on flow flush2: %s \n errmsg: %s\n",
rte_strerror(-retval), flush_error.message);
}
rte_eal_cleanup();
return EXIT_SUCCESS;
}
@lukashino
Copy link
Author

Tested on DPDK 19.11

@ramkumarpvs
Copy link

@lukashino , did you get a chance to check DPDK 21.11 or above with ICE driver and E800 cards ? If it is working can you share sample ?

@lukashino
Copy link
Author

per my knowledge the ICE driver should have a more ordinary way to set the RSS through port_conf structure - please see:
Here is a snippet from the Suricata codebase - this function configures the port_conf structure:
https://github.com/OISF/suricata/blob/master/src/runmode-dpdk.c#L1134

and calls the driver-specific function to set the proper RSS Hash functions
https://github.com/OISF/suricata/blob/master/src/util-dpdk-ice.c#L40

@lukashino
Copy link
Author

Also the Suricata file util-dpdk-i40e.c can help with further RSS setup on i40e

@ramkumarpvs
Copy link

ramkumarpvs commented Mar 20, 2024

@lukashino interesting. So now we don't need to set RSS rules individually ? Trying to understand that part. We were using RSS rules configuration on i40e and they are not working for ice.
Looks like i40e only has special way of dealing with RSS.

@lukashino
Copy link
Author

lukashino commented Mar 20, 2024 via email

@ramkumarpvs
Copy link

Are you using static key or getting key from the card ?

@ramkumarpvs
Copy link

ramkumarpvs commented Mar 25, 2024

@lukashino , I started using default key from ICE driver on E810 card with DPDK 21.11 and default RSS offloads. I see RSS config is accepted and using default offloads.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment