Skip to content

Instantly share code, notes, and snippets.

@Zheaoli
Last active June 29, 2022 09:09
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Zheaoli/108546daf349de358761f9f8b9664a53 to your computer and use it in GitHub Desktop.
Save Zheaoli/108546daf349de358761f9f8b9664a53 to your computer and use it in GitHub Desktop.
Connection in Use Case
import socket
import time
s = socket.socket()
host = "127.0.0.1"
port = 12345
s.connect((host, port))
print(s.recv(1024).decode())
time.sleep(10000000)
s.close()
import socket
import time
import os
def abc():
s = socket.socket()
host = "127.0.0.1"
port = 12345
s.bind((host, port))
# print(s.error)
s.listen(5)
while True:
c, addr = s.accept()
print("连接地址:", addr)
c.send("Manjusaka!".encode())
break
s.close()
while True:
abc()
time.sleep(1)
@Zheaoli
Copy link
Author

Zheaoli commented Apr 30, 2022

这个问题差不多是这样

服务端主动关闭 socket 之后,走了一系列调用

TIME(s)            FUNCTION
2.718627453        tcp_close
        tcp_close
        __sock_release
        sock_close
        __fput
        ____fput
        task_work_run
        exit_to_user_mode_prepare
        syscall_exit_to_user_mode
        do_syscall_64
        entry_SYSCALL_64_after_hwframe

调用栈参考上面的,在 tcp_close 后,主动给客户端发送一个 FIN 后进入 FIN_WAIT1 状态(实际上是先把内核的 socket 对象设置为 FIN_WAIT1 ,然后再调用 tcp_send_fin 发送 FIN),在收到 ACK 后进入 FIN_WAIT2 状态。由于客户端各种原因一直没有 close,导致 server 侧一直处于一个 FIN_WAIT2 的状态,进而一直占用一个 socket(但是这个 socket 关联的文件描述符已经提前释放)(等待 2MSL 后释放),导致 server 端重启后会出现 Bind 失败的问题。

额外写个脚本再验证下,我们 hook tcp_send_fintcp_fin 这两个函数,我们预期的结果是,有一次内核状态为 TCP_FIN_WAIT1 的 FIN 发送请求(位于 tcp_send_fin),没有 TCP_FIN_WAIT2tcp_fin 的处理(收到 FIN 后的处理)

from bcc import BPF
import struct
import socket

bpf_text = """
#include <linux/ptrace.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <uapi/linux/ptrace.h>
#include <net/sock.h>
#include <net/tcp_states.h>
#include <bcc/proto.h>
#include <linux/socket.h>

BPF_RINGBUF_OUTPUT(events, 65536);


struct event_data_t {
    u16 state;
    u32 saddr;
    u32 daddr;
    u16 lport;
    u16 dport;
    u16 type;
};

static struct event_data_t * generate_event_data(struct sock *sk) {
    struct event_data_t *event_data=events.ringbuf_reserve(sizeof(struct event_data_t));
    if (!event_data) {
        return NULL;
    }

    event_data->state = sk->__sk_common.skc_state;
    event_data->saddr = sk->__sk_common.skc_rcv_saddr;
    event_data->daddr = sk->__sk_common.skc_daddr;
    event_data->lport = sk->__sk_common.skc_num;
    event_data->dport = sk->__sk_common.skc_dport;
    return event_data;
}

int trace_tcp_fin(struct pt_regs *ctx, struct sock *sk) {
    u16 state = sk->__sk_common.skc_state;
    if (state != TCP_FIN_WAIT2) {
        return 0;
    }
    struct event_data_t *event_data=generate_event_data(sk);
    if (!event_data) {
        return 0;
    }
    event_data->type = 1;
    
    events.ringbuf_submit(event_data, sizeof(event_data));
    return 0;
}

int trace_tcp_send_fin(struct pt_regs *ctx, struct sock *sk) {
    u16 state = sk->__sk_common.skc_state;
    if (state != TCP_FIN_WAIT1) {
        return 0;
    }
    struct event_data_t *event_data=generate_event_data(sk);
    if (!event_data) {
        return 0;
    }

    event_data->type = 2;
    
    events.ringbuf_submit(event_data, sizeof(event_data));
    return 0;
}




"""

bpf = BPF(text=bpf_text)

bpf.attach_kprobe(event="tcp_fin", fn_name="trace_tcp_fin")
bpf.attach_kprobe(event="tcp_send_fin", fn_name="trace_tcp_send_fin")


def parse_ip_address(data):
    results = [0, 0, 0, 0]
    results[3] = data & 0xFF
    results[2] = (data >> 8) & 0xFF
    results[1] = (data >> 16) & 0xFF
    results[0] = (data >> 24) & 0xFF
    return ".".join([str(i) for i in results[::-1]])


def process_event_data(cpu, data, size):
    event = bpf["events"].event(data)
    print(f"Source Address:{parse_ip_address(event.saddr)}, Source Port: {event.lport}, Dest Address: {parse_ip_address(event.daddr)}, Dest Port: {socket.ntohs(event.dport)}, State: {event.state}, Action {'Send FIN' if event.type==2 else 'Receive FIN'}")

bpf["events"].open_ring_buffer(process_event_data)
while True:
    try:
        bpf.ring_buffer_consume()
    except KeyboardInterrupt:
        exit()

可以用上面的最小复现的例子试一下

@jschwinger233
Copy link

这个问题和客户端无关, 就算客户端在收到 fin 之后及时 close, 服务端也会残留 TIME_WAIT 导致 address in use.
你的最小复现代码里, 把 client.py 里的 sleep 删掉一样可以复现, 足以证明和客户端行为无关, 只是残留状态一个是 TIME_WAIT 一个是 FIN_WAIT2, 这是 TCP 的设计.
解决这个问题要用 SO_REUSEADDR 和 SO_REUSEPORT.

@Zheaoli
Copy link
Author

Zheaoli commented Jun 29, 2022

@jschwinger233 嗯,是的,最终的解法是 SO_REUSEADDR/SO_REUSEPORT

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment