Created
August 24, 2023 20:30
-
-
Save pavel-odintsov/711d1392707a6829439420e458c6436d to your computer and use it in GitHub Desktop.
reuseport_bpf.c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Test functionality of BPF filters for SO_REUSEPORT. The tests below will use | |
* a BPF program (both classic and extended) to read the first word from an | |
* incoming packet (expected to be in network byte-order), calculate a modulus | |
* of that number, and then dispatch the packet to the Nth socket using the | |
* result. These tests are run for each supported address family and protocol. | |
* Additionally, a few edge cases in the implementation are tested. | |
*/ | |
#include <errno.h> | |
#include <error.h> | |
#include <fcntl.h> | |
#include <linux/bpf.h> | |
#include <linux/filter.h> | |
#include <linux/unistd.h> | |
#include <netinet/in.h> | |
#include <netinet/tcp.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <sys/epoll.h> | |
#include <sys/types.h> | |
#include <sys/socket.h> | |
#include <sys/resource.h> | |
#include <unistd.h> | |
#include "../kselftest.h" | |
struct test_params { | |
int recv_family; | |
int send_family; | |
int protocol; | |
size_t recv_socks; | |
uint16_t recv_port; | |
uint16_t send_port_min; | |
}; | |
static size_t sockaddr_size(void) | |
{ | |
return sizeof(struct sockaddr_storage); | |
} | |
static struct sockaddr *new_any_sockaddr(int family, uint16_t port) | |
{ | |
struct sockaddr_storage *addr; | |
struct sockaddr_in *addr4; | |
struct sockaddr_in6 *addr6; | |
addr = malloc(sizeof(struct sockaddr_storage)); | |
memset(addr, 0, sizeof(struct sockaddr_storage)); | |
switch (family) { | |
case AF_INET: | |
addr4 = (struct sockaddr_in *)addr; | |
addr4->sin_family = AF_INET; | |
addr4->sin_addr.s_addr = htonl(INADDR_ANY); | |
addr4->sin_port = htons(port); | |
break; | |
case AF_INET6: | |
addr6 = (struct sockaddr_in6 *)addr; | |
addr6->sin6_family = AF_INET6; | |
addr6->sin6_addr = in6addr_any; | |
addr6->sin6_port = htons(port); | |
break; | |
default: | |
error(1, 0, "Unsupported family %d", family); | |
} | |
return (struct sockaddr *)addr; | |
} | |
static struct sockaddr *new_loopback_sockaddr(int family, uint16_t port) | |
{ | |
struct sockaddr *addr = new_any_sockaddr(family, port); | |
struct sockaddr_in *addr4; | |
struct sockaddr_in6 *addr6; | |
switch (family) { | |
case AF_INET: | |
addr4 = (struct sockaddr_in *)addr; | |
addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); | |
break; | |
case AF_INET6: | |
addr6 = (struct sockaddr_in6 *)addr; | |
addr6->sin6_addr = in6addr_loopback; | |
break; | |
default: | |
error(1, 0, "Unsupported family %d", family); | |
} | |
return addr; | |
} | |
static void attach_cbpf(int fd, uint16_t mod) | |
{ | |
struct sock_filter code[] = { | |
/* A = (uint32_t)skb[0] */ | |
{ BPF_LD | BPF_W | BPF_ABS, 0, 0, 0 }, | |
/* A = A % mod */ | |
{ BPF_ALU | BPF_MOD, 0, 0, mod }, | |
/* return A */ | |
{ BPF_RET | BPF_A, 0, 0, 0 }, | |
}; | |
struct sock_fprog p = { | |
.len = ARRAY_SIZE(code), | |
.filter = code, | |
}; | |
if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p))) | |
error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF"); | |
} | |
static void build_recv_group(const struct test_params p, int fd[], uint16_t mod, | |
void (*attach_bpf)(int, uint16_t)) | |
{ | |
struct sockaddr * const addr = | |
new_any_sockaddr(p.recv_family, p.recv_port); | |
int i, opt; | |
for (i = 0; i < p.recv_socks; ++i) { | |
fd[i] = socket(p.recv_family, p.protocol, 0); | |
if (fd[i] < 0) | |
error(1, errno, "failed to create recv %d", i); | |
opt = 1; | |
if (setsockopt(fd[i], SOL_SOCKET, SO_REUSEPORT, &opt, | |
sizeof(opt))) | |
error(1, errno, "failed to set SO_REUSEPORT on %d", i); | |
if (i == 0) | |
attach_bpf(fd[i], mod); | |
if (bind(fd[i], addr, sockaddr_size())) | |
error(1, errno, "failed to bind recv socket %d", i); | |
if (p.protocol == SOCK_STREAM) { | |
opt = 4; | |
if (setsockopt(fd[i], SOL_TCP, TCP_FASTOPEN, &opt, | |
sizeof(opt))) | |
error(1, errno, | |
"failed to set TCP_FASTOPEN on %d", i); | |
if (listen(fd[i], p.recv_socks * 10)) | |
error(1, errno, "failed to listen on socket"); | |
} | |
} | |
free(addr); | |
} | |
static void send_from(struct test_params p, uint16_t sport, char *buf, | |
size_t len) | |
{ | |
struct sockaddr * const saddr = new_any_sockaddr(p.send_family, sport); | |
struct sockaddr * const daddr = | |
new_loopback_sockaddr(p.send_family, p.recv_port); | |
const int fd = socket(p.send_family, p.protocol, 0), one = 1; | |
if (fd < 0) | |
error(1, errno, "failed to create send socket"); | |
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))) | |
error(1, errno, "failed to set reuseaddr"); | |
if (bind(fd, saddr, sockaddr_size())) | |
error(1, errno, "failed to bind send socket"); | |
if (sendto(fd, buf, len, MSG_FASTOPEN, daddr, sockaddr_size()) < 0) | |
error(1, errno, "failed to send message"); | |
close(fd); | |
free(saddr); | |
free(daddr); | |
} | |
static void test_recv_order(const struct test_params p, int fd[], int mod) | |
{ | |
char recv_buf[8], send_buf[8]; | |
struct msghdr msg; | |
struct iovec recv_io = { recv_buf, 8 }; | |
struct epoll_event ev; | |
int epfd, conn, i, sport, expected; | |
uint32_t data, ndata; | |
epfd = epoll_create(1); | |
if (epfd < 0) | |
error(1, errno, "failed to create epoll"); | |
for (i = 0; i < p.recv_socks; ++i) { | |
ev.events = EPOLLIN; | |
ev.data.fd = fd[i]; | |
if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd[i], &ev)) | |
error(1, errno, "failed to register sock %d epoll", i); | |
} | |
memset(&msg, 0, sizeof(msg)); | |
msg.msg_iov = &recv_io; | |
msg.msg_iovlen = 1; | |
for (data = 0; data < p.recv_socks * 2; ++data) { | |
sport = p.send_port_min + data; | |
ndata = htonl(data); | |
memcpy(send_buf, &ndata, sizeof(ndata)); | |
send_from(p, sport, send_buf, sizeof(ndata)); | |
i = epoll_wait(epfd, &ev, 1, -1); | |
if (i < 0) | |
error(1, errno, "epoll wait failed"); | |
if (p.protocol == SOCK_STREAM) { | |
conn = accept(ev.data.fd, NULL, NULL); | |
if (conn < 0) | |
error(1, errno, "error accepting"); | |
i = recvmsg(conn, &msg, 0); | |
close(conn); | |
} else { | |
i = recvmsg(ev.data.fd, &msg, 0); | |
} | |
if (i < 0) | |
error(1, errno, "recvmsg error"); | |
if (i != sizeof(ndata)) | |
error(1, 0, "expected size %zd got %d", | |
sizeof(ndata), i); | |
for (i = 0; i < p.recv_socks; ++i) | |
if (ev.data.fd == fd[i]) | |
break; | |
memcpy(&ndata, recv_buf, sizeof(ndata)); | |
fprintf(stderr, "Socket %d: %d\n", i, ntohl(ndata)); | |
expected = (sport % mod); | |
if (i != expected) | |
error(1, 0, "expected socket %d", expected); | |
} | |
} | |
static void test_reuseport_cbpf(struct test_params p) | |
{ | |
int i, fd[p.recv_socks]; | |
fprintf(stderr, "Testing CBPF mod %zd...\n", p.recv_socks); | |
build_recv_group(p, fd, p.recv_socks, attach_cbpf); | |
test_recv_order(p, fd, p.recv_socks); | |
p.send_port_min += p.recv_socks * 2; | |
fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2); | |
attach_cbpf(fd[0], p.recv_socks / 2); | |
test_recv_order(p, fd, p.recv_socks / 2); | |
for (i = 0; i < p.recv_socks; ++i) | |
close(fd[i]); | |
} | |
static struct rlimit rlim_old; | |
static __attribute__((constructor)) void main_ctor(void) | |
{ | |
getrlimit(RLIMIT_MEMLOCK, &rlim_old); | |
if (rlim_old.rlim_cur != RLIM_INFINITY) { | |
struct rlimit rlim_new; | |
rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20); | |
rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20); | |
setrlimit(RLIMIT_MEMLOCK, &rlim_new); | |
} | |
} | |
static __attribute__((destructor)) void main_dtor(void) | |
{ | |
setrlimit(RLIMIT_MEMLOCK, &rlim_old); | |
} | |
int main(void) | |
{ | |
fprintf(stderr, "---- IPv4 UDP ----\n"); | |
/* NOTE: UDP socket lookups traverse a different code path when there | |
* are > 10 sockets in a group. Run the bpf test through both paths. | |
*/ | |
test_reuseport_cbpf((struct test_params) { | |
.recv_family = AF_INET, | |
.send_family = AF_INET, | |
.protocol = SOCK_DGRAM, | |
.recv_socks = 10, | |
.recv_port = 8001, | |
.send_port_min = 9020}); | |
test_reuseport_cbpf((struct test_params) { | |
.recv_family = AF_INET, | |
.send_family = AF_INET, | |
.protocol = SOCK_DGRAM, | |
.recv_socks = 20, | |
.recv_port = 8001, | |
.send_port_min = 9020}); | |
fprintf(stderr, "---- IPv6 UDP ----\n"); | |
test_reuseport_cbpf((struct test_params) { | |
.recv_family = AF_INET6, | |
.send_family = AF_INET6, | |
.protocol = SOCK_DGRAM, | |
.recv_socks = 10, | |
.recv_port = 8004, | |
.send_port_min = 9060}); | |
test_reuseport_cbpf((struct test_params) { | |
.recv_family = AF_INET6, | |
.send_family = AF_INET6, | |
.protocol = SOCK_DGRAM, | |
.recv_socks = 20, | |
.recv_port = 8004, | |
.send_port_min = 9060}); | |
test_reuseport_cbpf((struct test_params) { | |
.recv_family = AF_INET6, | |
.send_family = AF_INET, | |
.protocol = SOCK_DGRAM, | |
.recv_socks = 10, | |
.recv_port = 8007, | |
.send_port_min = 9100}); | |
test_reuseport_cbpf((struct test_params) { | |
.recv_family = AF_INET6, | |
.send_family = AF_INET, | |
.protocol = SOCK_DGRAM, | |
.recv_socks = 20, | |
.recv_port = 8007, | |
.send_port_min = 9100}); | |
fprintf(stderr, "SUCCESS\n"); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment