Skip to content

Instantly share code, notes, and snippets.

@SaveTheRbtz
Created September 17, 2012 00:09
Show Gist options
  • Save SaveTheRbtz/3734928 to your computer and use it in GitHub Desktop.
Save SaveTheRbtz/3734928 to your computer and use it in GitHub Desktop.
tcp: Added knobs to tune initial window sizes
commit e4d08801d7cfb46a2f8cd46049c455f63d85a064
Author: Alexey Ivanov <rbtz@yandex-team.ru>
Date: Wed Jul 18 22:14:36 2012 +0400
YANDEX: tcp: Added knobs to tune initial window sizes
Added following sysctls
/* TCP initial congestion window */
TCP_INIT_CWND -> net.ipv4.tcp_init_cwnd
/* Offer an initial receive window of 10 mss. */
TCP_DEFAULT_INIT_RCVWND -> net.ipv4.tcp_init_rcvwnd
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 60b7480..05f7607 100644
extern struct percpu_counter tcp_sockets_allocated;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index ea5acfb..b2d5c94 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -25,6 +25,7 @@
#include <net/ping.h>
static int zero;
+static int one = 1;
static int tcp_retr1_max = 255;
static int ip_local_port_range_min[] = { 1, 1 };
static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -688,6 +689,22 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec_ms_jiffies,
},
{
+ .procname = "tcp_init_cwnd",
+ .data = &sysctl_tcp_init_cwnd,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one
+ },
+ {
+ .procname = "tcp_init_rcvwnd",
+ .data = &sysctl_tcp_init_rcvwnd,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one
+ },
+ {
.procname = "udp_mem",
.data = &sysctl_udp_mem,
.maxlen = sizeof(sysctl_udp_mem),
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d2fa705..388ca95 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -86,6 +86,10 @@ int sysctl_tcp_app_win __read_mostly = 31;
int sysctl_tcp_adv_win_scale __read_mostly = 1;
EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
+/* TCP initial congestion window as per draft-hkchu-tcpm-initcwnd-01 */
+int sysctl_tcp_init_cwnd __read_mostly = 10; /* TCP_INIT_CWND */
+EXPORT_SYMBOL(sysctl_tcp_init_cwnd);
+
int sysctl_tcp_stdurg __read_mostly;
int sysctl_tcp_rfc1337 __read_mostly;
int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
@@ -267,7 +271,7 @@ static void tcp_fixup_sndbuf(struct sock *sk)
{
int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER);
- sndmem *= TCP_INIT_CWND;
+ sndmem *= sysctl_tcp_init_cwnd;
if (sk->sk_sndbuf < sndmem)
sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
}
@@ -347,14 +351,14 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
static void tcp_fixup_rcvbuf(struct sock *sk)
{
u32 mss = tcp_sk(sk)->advmss;
- u32 icwnd = TCP_DEFAULT_INIT_RCVWND;
+ u32 icwnd = sysctl_tcp_init_rcvwnd;
int rcvmem;
/* Limit to 10 segments if mss <= 1460,
* or 14600/mss segments, with a minimum of two segments.
*/
if (mss > 1460)
- icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
+ icwnd = max_t(u32, (1460 * sysctl_tcp_init_rcvwnd) / mss, 2);
rcvmem = SKB_TRUESIZE(mss + MAX_TCP_HEADER);
while (tcp_win_from_space(rcvmem) < mss)
@@ -833,7 +837,7 @@ __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
if (!cwnd)
- cwnd = TCP_INIT_CWND;
+ cwnd = sysctl_tcp_init_cwnd;
return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index af66ca9..4be5dea 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1885,7 +1885,7 @@ static int tcp_v4_init_sock(struct sock *sk)
* algorithms that we must have the following bandaid to talk
* efficiently to them. -DaveM
*/
- tp->snd_cwnd = TCP_INIT_CWND;
+ tp->snd_cwnd = sysctl_tcp_init_cwnd;
/* See draft-stevens-tcpca-spec-01 for discussion of the
* initialization of these values.
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 63b9c69..5228478 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -488,7 +488,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
* algorithms that we must have the following bandaid to talk
* efficiently to them. -DaveM
*/
- newtp->snd_cwnd = TCP_INIT_CWND;
+ newtp->snd_cwnd = sysctl_tcp_init_cwnd;
newtp->snd_cwnd_cnt = 0;
newtp->bytes_acked = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 524b585..340cbc8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -63,6 +63,9 @@ int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */
EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size);
+/* Offer an initial receive window of 10 mss. */
+int sysctl_tcp_init_rcvwnd __read_mostly = 10; /* TCP_DEFAULT_INIT_RCVWND */
+EXPORT_SYMBOL_GPL(sysctl_tcp_init_rcvwnd);
/* Account for new data that has been sent to the network. */
static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
@@ -229,14 +232,14 @@ void tcp_select_initial_window(int __space, __u32 mss,
}
/* Set initial window to a value enough for senders starting with
- * initial congestion window of TCP_DEFAULT_INIT_RCVWND. Place
+ * initial congestion window of sysctl_tcp_init_rcvwnd. Place
* a limit on the initial window when mss is larger than 1460.
*/
if (mss > (1 << *rcv_wscale)) {
- int init_cwnd = TCP_DEFAULT_INIT_RCVWND;
+ int init_cwnd = sysctl_tcp_init_rcvwnd;
if (mss > 1460)
init_cwnd =
- max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
+ max_t(u32, (1460 * sysctl_tcp_init_rcvwnd) / mss, 2);
/* when initializing use the value from init_rcv_wnd
* rather than the default from above
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment