Last active
April 5, 2024 03:51
-
-
Save love4taylor/111d56cd2b1dc149cba6d80f617f47b1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From 99c24dd6eb2b39e883431874ccae6eaccc735cea Mon Sep 17 00:00:00 2001 | |
From: love4taylor <i@love4taylor.com> | |
Date: Thu, 30 Nov 2023 22:51:13 +0900 | |
Subject: [PATCH 1/2] net-tcp_brutal: make it as a built-in kernel module | |
--- | |
net/ipv4/Kconfig | 9 ++ | |
net/ipv4/Makefile | 1 + | |
net/ipv4/tcp_brutal.c | 311 ++++++++++++++++++++++++++++++++++++++++++ | |
3 files changed, 321 insertions(+) | |
create mode 100644 net/ipv4/tcp_brutal.c | |
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig | |
index 8e94ed7c56a0..c58e7de8bfe3 100644 | |
--- a/net/ipv4/Kconfig | |
+++ b/net/ipv4/Kconfig | |
@@ -678,6 +678,15 @@ config TCP_CONG_BBR | |
AQM schemes that do not provide a delay signal. It requires the fq | |
("Fair Queue") pacing packet scheduler. | |
+config TCP_CONG_BRUTAL | |
+ tristate "TCP Brutal" | |
+ default n | |
+ help | |
+ | |
+ TCP Brutal is Hysteria's congestion control algorithm ported to TCP, as a | |
+ Linux kernel module. Information about Brutal itself can be found in the | |
+ Hysteria documentation. | |
+ | |
choice | |
prompt "Default TCP congestion control" | |
default DEFAULT_CUBIC | |
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile | |
index ec36d2ec059e..0647ec2ff635 100644 | |
--- a/net/ipv4/Makefile | |
+++ b/net/ipv4/Makefile | |
@@ -45,6 +45,7 @@ obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o | |
obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o | |
obj-$(CONFIG_INET_RAW_DIAG) += raw_diag.o | |
obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o | |
+obj-$(CONFIG_TCP_CONG_BRUTAL) += tcp_brutal.o | |
obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o | |
obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o | |
obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o | |
diff --git a/net/ipv4/tcp_brutal.c b/net/ipv4/tcp_brutal.c | |
new file mode 100644 | |
index 000000000000..98eebf1b6f2c | |
--- /dev/null | |
+++ b/net/ipv4/tcp_brutal.c | |
@@ -0,0 +1,311 @@ | |
+#include <linux/module.h> | |
+#include <linux/version.h> | |
+#include <net/tcp.h> | |
+ | |
+#if IS_ENABLED(CONFIG_IPV6) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0) | |
+#include <net/transp_v6.h> | |
+#else | |
+#warning IPv6 support is disabled. Brutal will only work with IPv4. \ | |
+ Please ensure you have enabled CONFIG_IPV6 in your kernel config \ | |
+ and your kernel version is greater than 5.8. | |
+#endif | |
+ | |
+#define INIT_PACING_RATE 125000 // 1 Mbps | |
+#define INIT_CWND_GAIN 20 | |
+ | |
+#define MIN_PACING_RATE 62500 // 500 Kbps | |
+#define MIN_CWND_GAIN 5 | |
+#define MAX_CWND_GAIN 80 | |
+#define MIN_CWND 4 | |
+ | |
+#ifndef ICSK_CA_PRIV_SIZE | |
+#error "ICSK_CA_PRIV_SIZE not defined" | |
+#else | |
+// This is the size of the private data area in struct inet_connection_sock | |
+// The size varies between Linux versions | |
+// We use it to calculate the number of slots in the packet info array | |
+#define RAW_PKT_INFO_SLOTS ((ICSK_CA_PRIV_SIZE - 2 * sizeof(u64)) / sizeof(struct brutal_pkt_info)) | |
+#define PKT_INFO_SLOTS (RAW_PKT_INFO_SLOTS < 3 ? 3 : (RAW_PKT_INFO_SLOTS > 5 ? 5 : RAW_PKT_INFO_SLOTS)) | |
+#endif | |
+ | |
+#define MIN_PKT_INFO_SAMPLES 50 | |
+#define MIN_ACK_RATE_PERCENT 80 | |
+ | |
+#define TCP_BRUTAL_PARAMS 23301 | |
+ | |
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) | |
+u64 tcp_sock_get_sec(const struct tcp_sock *tp) | |
+{ | |
+ return tp->tcp_mstamp / USEC_PER_SEC; | |
+} | |
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0) | |
+// see https://github.com/torvalds/linux/commit/9a568de4818dea9a05af141046bd3e589245ab83 | |
+u64 tcp_sock_get_sec(const struct tcp_sock *tp) | |
+{ | |
+ return tp->tcp_mstamp.stamp_us / USEC_PER_SEC; | |
+} | |
+#else | |
+#include <linux/jiffies.h> | |
+u64 tcp_sock_get_sec(const struct tcp_sock *tp) | |
+{ | |
+ return jiffies_to_usecs(tcp_time_stamp) / USEC_PER_SEC; | |
+} | |
+#endif | |
+ | |
+struct brutal_pkt_info | |
+{ | |
+ u64 sec; | |
+ u32 acked; | |
+ u32 losses; | |
+}; | |
+ | |
+struct brutal | |
+{ | |
+ u64 rate; | |
+ u32 cwnd_gain; | |
+ | |
+ struct brutal_pkt_info slots[PKT_INFO_SLOTS]; | |
+}; | |
+ | |
+struct brutal_params | |
+{ | |
+ u64 rate; // Send rate in bytes per second | |
+ u32 cwnd_gain; // CWND gain in tenths (10=1.0) | |
+} __packed; | |
+ | |
+static struct proto tcp_prot_override __ro_after_init; | |
+#ifdef _TRANSP_V6_H | |
+static struct proto tcpv6_prot_override __ro_after_init; | |
+#endif // _TRANSP_V6_H | |
+ | |
+#ifdef _LINUX_SOCKPTR_H | |
+static int brutal_set_params(struct sock *sk, sockptr_t optval, unsigned int optlen) | |
+#else | |
+static int brutal_set_params(struct sock *sk, char __user *optval, unsigned int optlen) | |
+#endif | |
+{ | |
+ struct brutal *brutal = inet_csk_ca(sk); | |
+ struct brutal_params params; | |
+ | |
+ if (optlen < sizeof(params)) | |
+ return -EINVAL; | |
+ | |
+#ifdef _LINUX_SOCKPTR_H | |
+ if (copy_from_sockptr(¶ms, optval, sizeof(params))) | |
+ return -EFAULT; | |
+#else | |
+ if (copy_from_user(¶ms, optval, sizeof(params))) | |
+ return -EFAULT; | |
+#endif | |
+ | |
+ // Sanity checks | |
+ if (params.rate < MIN_PACING_RATE) | |
+ return -EINVAL; | |
+ if (params.cwnd_gain < MIN_CWND_GAIN || params.cwnd_gain > MAX_CWND_GAIN) | |
+ return -EINVAL; | |
+ | |
+ brutal->rate = params.rate; | |
+ brutal->cwnd_gain = params.cwnd_gain; | |
+ | |
+ return 0; | |
+} | |
+ | |
+#ifdef _LINUX_SOCKPTR_H | |
+static int brutal_tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) | |
+#else | |
+static int brutal_tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) | |
+#endif | |
+{ | |
+ if (level == IPPROTO_TCP && optname == TCP_BRUTAL_PARAMS) | |
+ return brutal_set_params(sk, optval, optlen); | |
+ else | |
+ return tcp_prot.setsockopt(sk, level, optname, optval, optlen); | |
+} | |
+ | |
+#ifdef _TRANSP_V6_H | |
+#ifdef _LINUX_SOCKPTR_H | |
+static int brutal_tcpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) | |
+#else // _LINUX_SOCKPTR_H | |
+static int brutal_tcpv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) | |
+#endif // _LINUX_SOCKPTR_H | |
+{ | |
+ if (level == IPPROTO_TCP && optname == TCP_BRUTAL_PARAMS) | |
+ return brutal_set_params(sk, optval, optlen); | |
+ else | |
+ return tcpv6_prot.setsockopt(sk, level, optname, optval, optlen); | |
+} | |
+#endif // _TRANSP_V6_H | |
+ | |
+static void brutal_init(struct sock *sk) | |
+{ | |
+ struct tcp_sock *tp = tcp_sk(sk); | |
+ struct brutal *brutal = inet_csk_ca(sk); | |
+ | |
+ if (sk->sk_family == AF_INET) | |
+ sk->sk_prot = &tcp_prot_override; | |
+#ifdef _TRANSP_V6_H | |
+ else if (sk->sk_family == AF_INET6) | |
+ sk->sk_prot = &tcpv6_prot_override; | |
+#endif // _TRANSP_V6_H | |
+ else | |
+ BUG(); // WTF? | |
+ | |
+ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; | |
+ | |
+ brutal->rate = INIT_PACING_RATE; | |
+ brutal->cwnd_gain = INIT_CWND_GAIN; | |
+ | |
+ memset(brutal->slots, 0, sizeof(brutal->slots)); | |
+ | |
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) | |
+ // Pacing is REQUIRED for Brutal to work, but Linux only has internal pacing after 4.13. | |
+ // For kernels prior to 4.13, you MUST add fq pacing manually (e.g. "tc qdisc add dev eth0 root fq pacing") | |
+ // or rate control will be broken. | |
+ // See https://github.com/torvalds/linux/commit/218af599fa635b107cfe10acf3249c4dfe5e4123 for details. | |
+ cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); | |
+#endif | |
+} | |
+ | |
+// Copied from tcp.h for compatibility reasons | |
+static inline u32 brutal_tcp_snd_cwnd(const struct tcp_sock *tp) | |
+{ | |
+ return tp->snd_cwnd; | |
+} | |
+ | |
+// Copied from tcp.h for compatibility reasons | |
+static inline void brutal_tcp_snd_cwnd_set(struct tcp_sock *tp, u32 val) | |
+{ | |
+ WARN_ON_ONCE((int)val <= 0); | |
+ tp->snd_cwnd = val; | |
+} | |
+ | |
+static void brutal_update_rate(struct sock *sk) | |
+{ | |
+ struct tcp_sock *tp = tcp_sk(sk); | |
+ struct brutal *brutal = inet_csk_ca(sk); | |
+ | |
+ u64 sec = tcp_sock_get_sec(tp); | |
+ u64 min_sec = sec - PKT_INFO_SLOTS; | |
+ u32 acked = 0, losses = 0; | |
+ u32 ack_rate; // Scaled by 100 (100=1.00) as kernel doesn't support float | |
+ u64 rate = brutal->rate; | |
+ u32 cwnd; | |
+ | |
+ u32 mss = tp->mss_cache; | |
+ u32 rtt_ms = (tp->srtt_us >> 3) / USEC_PER_MSEC; | |
+ if (!rtt_ms) | |
+ rtt_ms = 1; | |
+ | |
+ for (int i = 0; i < PKT_INFO_SLOTS; i++) | |
+ { | |
+ if (brutal->slots[i].sec >= min_sec) | |
+ { | |
+ acked += brutal->slots[i].acked; | |
+ losses += brutal->slots[i].losses; | |
+ } | |
+ } | |
+ if (acked + losses < MIN_PKT_INFO_SAMPLES) | |
+ ack_rate = 100; | |
+ else | |
+ { | |
+ ack_rate = acked * 100 / (acked + losses); | |
+ if (ack_rate < MIN_ACK_RATE_PERCENT) | |
+ ack_rate = MIN_ACK_RATE_PERCENT; | |
+ } | |
+ | |
+ rate *= 100; | |
+ rate /= ack_rate; | |
+ | |
+ // The order here is chosen carefully to avoid overflow as much as possible | |
+ cwnd = rate / MSEC_PER_SEC; | |
+ cwnd *= rtt_ms; | |
+ cwnd /= mss; | |
+ cwnd *= brutal->cwnd_gain; | |
+ cwnd /= 10; | |
+ cwnd = max_t(u32, cwnd, MIN_CWND); | |
+ | |
+ brutal_tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); | |
+ | |
+ WRITE_ONCE(sk->sk_pacing_rate, min_t(u64, rate, READ_ONCE(sk->sk_max_pacing_rate))); | |
+} | |
+ | |
+static void brutal_main(struct sock *sk, const struct rate_sample *rs) | |
+{ | |
+ struct tcp_sock *tp = tcp_sk(sk); | |
+ struct brutal *brutal = inet_csk_ca(sk); | |
+ | |
+ u64 sec; | |
+ u32 slot; | |
+ | |
+ // Ignore invalid rate samples | |
+ if (rs->delivered < 0 || rs->interval_us <= 0) | |
+ return; | |
+ | |
+ sec = tcp_sock_get_sec(tp); | |
+ slot = sec % PKT_INFO_SLOTS; | |
+ | |
+ if (brutal->slots[slot].sec == sec) | |
+ { | |
+ // Current slot, update | |
+ brutal->slots[slot].acked += rs->acked_sacked; | |
+ brutal->slots[slot].losses += rs->losses; | |
+ } | |
+ else | |
+ { | |
+ // Uninitialized slot or slot expired | |
+ brutal->slots[slot].sec = sec; | |
+ brutal->slots[slot].acked = rs->acked_sacked; | |
+ brutal->slots[slot].losses = rs->losses; | |
+ } | |
+ | |
+ brutal_update_rate(sk); | |
+} | |
+ | |
+static u32 brutal_undo_cwnd(struct sock *sk) | |
+{ | |
+ return brutal_tcp_snd_cwnd(tcp_sk(sk)); | |
+} | |
+ | |
+static u32 brutal_ssthresh(struct sock *sk) | |
+{ | |
+ return tcp_sk(sk)->snd_ssthresh; | |
+} | |
+ | |
+static struct tcp_congestion_ops tcp_brutal_ops = { | |
+ .flags = TCP_CONG_NON_RESTRICTED, | |
+ .name = "brutal", | |
+ .owner = THIS_MODULE, | |
+ .init = brutal_init, | |
+ .cong_control = brutal_main, | |
+ .undo_cwnd = brutal_undo_cwnd, | |
+ .ssthresh = brutal_ssthresh, | |
+}; | |
+ | |
+static int __init brutal_register(void) | |
+{ | |
+ BUILD_BUG_ON(sizeof(struct brutal) > ICSK_CA_PRIV_SIZE); | |
+ BUILD_BUG_ON(PKT_INFO_SLOTS < 1); | |
+ | |
+ tcp_prot_override = tcp_prot; | |
+ tcp_prot_override.setsockopt = brutal_tcp_setsockopt; | |
+ | |
+#ifdef _TRANSP_V6_H | |
+ tcpv6_prot_override = tcpv6_prot; | |
+ tcpv6_prot_override.setsockopt = brutal_tcpv6_setsockopt; | |
+#endif // _TRANSP_V6_H | |
+ | |
+ return tcp_register_congestion_control(&tcp_brutal_ops); | |
+} | |
+ | |
+static void __exit brutal_unregister(void) | |
+{ | |
+ tcp_unregister_congestion_control(&tcp_brutal_ops); | |
+} | |
+ | |
+module_init(brutal_register); | |
+module_exit(brutal_unregister); | |
+ | |
+MODULE_AUTHOR("Aperture Internet Laboratory"); | |
+MODULE_LICENSE("GPL"); | |
+MODULE_DESCRIPTION("TCP Brutal"); | |
+MODULE_VERSION("1.0.1"); | |
-- | |
2.39.2 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From fc36b7922c3dedccfcfa6f8be4db24d8325b9518 Mon Sep 17 00:00:00 2001 | |
From: love4taylor <i@love4taylor.com> | |
Date: Fri, 5 Apr 2024 12:46:25 +0900 | |
Subject: [PATCH 2/2] net-tcp_brutal: use div_u64() to let it build on 32-bit | |
--- | |
net/ipv4/tcp_brutal.c | 15 ++++++++------- | |
1 file changed, 8 insertions(+), 7 deletions(-) | |
diff --git a/net/ipv4/tcp_brutal.c b/net/ipv4/tcp_brutal.c | |
index 98eebf1b6f2c..499981841d99 100644 | |
--- a/net/ipv4/tcp_brutal.c | |
+++ b/net/ipv4/tcp_brutal.c | |
@@ -1,6 +1,7 @@ | |
#include <linux/module.h> | |
#include <linux/version.h> | |
#include <net/tcp.h> | |
+#include <linux/math64.h> | |
#if IS_ENABLED(CONFIG_IPV6) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 8, 0) | |
#include <net/transp_v6.h> | |
@@ -36,19 +37,19 @@ | |
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0) | |
u64 tcp_sock_get_sec(const struct tcp_sock *tp) | |
{ | |
- return tp->tcp_mstamp / USEC_PER_SEC; | |
+ return div_u64(tp->tcp_mstamp, USEC_PER_SEC); | |
} | |
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 12, 0) | |
// see https://github.com/torvalds/linux/commit/9a568de4818dea9a05af141046bd3e589245ab83 | |
u64 tcp_sock_get_sec(const struct tcp_sock *tp) | |
{ | |
- return tp->tcp_mstamp.stamp_us / USEC_PER_SEC; | |
+ return div_u64(tp->tcp_mstamp.stamp_us, USEC_PER_SEC); | |
} | |
#else | |
#include <linux/jiffies.h> | |
u64 tcp_sock_get_sec(const struct tcp_sock *tp) | |
{ | |
- return jiffies_to_usecs(tcp_time_stamp) / USEC_PER_SEC; | |
+ return div_u64(jiffies_to_usecs(tcp_time_stamp), USEC_PER_SEC); | |
} | |
#endif | |
@@ -214,10 +215,10 @@ static void brutal_update_rate(struct sock *sk) | |
} | |
rate *= 100; | |
- rate /= ack_rate; | |
+ rate = div_u64(rate, ack_rate); | |
// The order here is chosen carefully to avoid overflow as much as possible | |
- cwnd = rate / MSEC_PER_SEC; | |
+ cwnd = div_u64(rate, MSEC_PER_SEC); | |
cwnd *= rtt_ms; | |
cwnd /= mss; | |
cwnd *= brutal->cwnd_gain; | |
@@ -242,7 +243,7 @@ static void brutal_main(struct sock *sk, const struct rate_sample *rs) | |
return; | |
sec = tcp_sock_get_sec(tp); | |
- slot = sec % PKT_INFO_SLOTS; | |
+ div_u64_rem(sec, PKT_INFO_SLOTS, &slot); | |
if (brutal->slots[slot].sec == sec) | |
{ | |
@@ -308,4 +309,4 @@ module_exit(brutal_unregister); | |
MODULE_AUTHOR("Aperture Internet Laboratory"); | |
MODULE_LICENSE("GPL"); | |
MODULE_DESCRIPTION("TCP Brutal"); | |
-MODULE_VERSION("1.0.1"); | |
+MODULE_VERSION("1.0.2"); | |
-- | |
2.39.2 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment