Skip to content

Instantly share code, notes, and snippets.

@stevenschlansker
Created September 24, 2015 23:58
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save stevenschlansker/6ad46c5ccb22bc4f3473 to your computer and use it in GitHub Desktop.
Save stevenschlansker/6ad46c5ccb22bc4f3473 to your computer and use it in GitHub Desktop.
Reproduction case for glibc netlink hang
socket(PF_NETLINK, SOCK_RAW, 0) = 78
bind(78, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0
getsockname(78, {sa_family=AF_NETLINK, pid=-1328743, groups=00000000}, [12]) = 0
sendto(78, "\24\0\0\0\26\0\1\3O\206\4V\0\0\0\0\0\0\0\0", 20, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 20
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}])
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"L\0\0\0\24\0\2\0O\206\4V\231\271\353\377\2\10\200\376\1\0\0\0\10\0\1\0\177\0\0\1"..., 4096}], msg_controllen=0, msg_flags=0}, 0) = 156
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}])
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"H\0\0\0\24\0\2\0O\206\4V\231\271\353\377\n\200\200\376\1\0\0\0\24\0\1\0\0\0\0\0"..., 4096}], msg_controllen=0, msg_flags=0}, 0) = 144
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}])
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\24\0\0\0\3\0\2\0O\206\4V\231\271\353\377\0\0\0\0", 4096}], msg_controllen=0, msg_flags=0}, 0) = 20
close(78) = 0
socket(PF_NETLINK, SOCK_RAW, 0) = 71
bind(71, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0
getsockname(71, {sa_family=AF_NETLINK, pid=-1328749, groups=00000000}, [12]) = 0
sendto(71, "\24\0\0\0\26\0\1\3O\206\4V\0\0\0\0\0\0\0\0", 20, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 20
poll([{fd=71, events=POLLIN}], 1, 1000) = 1 ([{fd=71, revents=POLLIN}])
recvmsg(71, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"L\0\0\0\24\0\2\0O\206\4V\223\271\353\377\2\10\200\376\1\0\0\0\10\0\1\0\177\0\0\1"..., 4096}], msg_controllen=0, msg_flags=0}, 0) = 156
poll([{fd=71, events=POLLIN}], 1, 1000) = 1 ([{fd=71, revents=POLLIN}])
recvmsg(71, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"H\0\0\0\24\0\2\0O\206\4V\223\271\353\377\n\200\200\376\1\0\0\0\24\0\1\0\0\0\0\0"..., 4096}], msg_controllen=0, msg_flags=0}, 0) = 144
poll([{fd=71, events=POLLIN}], 1, 1000) = 1 ([{fd=71, revents=POLLIN}])
recvmsg(71, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\24\0\0\0\3\0\2\0O\206\4V\223\271\353\377\0\0\0\0", 4096}], msg_controllen=0, msg_flags=0}, 0) = 20
close(71) = 0
socket(PF_NETLINK, SOCK_RAW, 0) = 78
bind(78, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0
getsockname(78, {sa_family=AF_NETLINK, pid=-1328759, groups=00000000}, [12]) = 0
sendto(78, "\24\0\0\0\26\0\1\3O\206\4V\0\0\0\0\0\0\0\0", 20, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 20
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}])
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"L\0\0\0\24\0\2\0O\206\4V\211\271\353\377\2\10\200\376\1\0\0\0\10\0\1\0\177\0\0\1"..., 4096}], msg_controllen=0, msg_flags=0}, 0) = 156
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}])
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"H\0\0\0\24\0\2\0O\206\4V\211\271\353\377\n\200\200\376\1\0\0\0\24\0\1\0\0\0\0\0"..., 4096}], msg_controllen=0, msg_flags=0}, 0) = 144
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}])
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\24\0\0\0\3\0\2\0O\206\4V\211\271\353\377\0\0\0\0", 4096}], msg_controllen=0, msg_flags=0}, 0) = 20
close(78) = 0
socket(PF_NETLINK, SOCK_RAW, 0) = 78
bind(78, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0
getsockname(78, {sa_family=AF_NETLINK, pid=-1328771, groups=00000000}, [12]) = 0
sendto(78, "\24\0\0\0\26\0\1\3O\206\4V\0\0\0\0\0\0\0\0", 20, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 20
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}])
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"L\0\0\0\24\0\2\0O\206\4V}\271\353\377\2\10\200\376\1\0\0\0\10\0\1\0\177\0\0\1"..., 4096}], msg_controllen=0, msg_flags=0}, 0) = 156
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}])
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"H\0\0\0\24\0\2\0O\206\4V}\271\353\377\n\200\200\376\1\0\0\0\24\0\1\0\0\0\0\0"..., 4096}], msg_controllen=0, msg_flags=0}, 0) = 144
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}])
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\24\0\0\0\3\0\2\0O\206\4V}\271\353\377\0\0\0\0", 4096}], msg_controllen=0, msg_flags=0}, 0) = 20
close(78) = 0
socket(PF_NETLINK, SOCK_RAW, 0) = 78
bind(78, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0
getsockname(78, {sa_family=AF_NETLINK, pid=-1328781, groups=00000000}, [12]) = 0
sendto(78, "\24\0\0\0\26\0\1\3O\206\4V\0\0\0\0\0\0\0\0", 20, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 20
poll([{fd=78, events=POLLIN}], 1, 1000) = 0 (Timeout)
gettid() = 9370
write(2, "[9370] glibc: check_pf: netlink "..., 52) = 52
rt_sigprocmask(SIG_UNBLOCK, [ABRT], NULL, 8) = 0
tgkill(8599, 9370, SIGABRT) = 0
--- SIGABRT {si_signo=SIGABRT, si_code=SI_TKILL, si_pid=8599, si_uid=0} ---
+++ killed by SIGABRT (core dumped) +++
/* Determine protocol families for which interfaces exist. Linux version.
Copyright (C) 2003-2014 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <stdio.h>
#include <assert.h>
#include <errno.h>
#include <ifaddrs.h>
#include <netdb.h>
#include <stddef.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <alloca.h>
#include <stdlib.h>
#include <stdint.h>
#include <sys/socket.h>
#include <poll.h>
#include <sched.h>
#include <pthread.h>
#include <sys/syscall.h>
#include <asm/types.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <bits/libc-lock.h>
#ifndef IFA_F_HOMEADDRESS
# define IFA_F_HOMEADDRESS 0
#endif
#ifndef IFA_F_OPTIMISTIC
# define IFA_F_OPTIMISTIC 0
#endif
#ifndef TEMP_FAILURE_RETRY
#define TEMP_FAILURE_RETRY(expression) \
( \
({ long int __result; \
do __result = (long int) (expression); \
while (__result == -1L && errno == EINTR); \
__result; }))
#endif
struct in6addrinfo
{
enum {
in6ai_deprecated = 1,
in6ai_homeaddress = 2
} flags:8;
uint8_t prefixlen;
uint16_t :16;
uint32_t index;
uint32_t addr[4];
};
long gettid() {
return syscall(SYS_gettid);
}
static void
make_request (int fd, pid_t pid)
{
struct req
{
struct nlmsghdr nlh;
struct rtgenmsg g;
/* struct rtgenmsg consists of a single byte. This means there
are three bytes of padding included in the REQ definition.
We make them explicit here. */
char pad[3];
} req;
struct sockaddr_nl nladdr;
req.nlh.nlmsg_len = sizeof (req);
req.nlh.nlmsg_type = RTM_GETADDR;
req.nlh.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
req.nlh.nlmsg_pid = 0;
req.nlh.nlmsg_seq = time (NULL);
req.g.rtgen_family = AF_UNSPEC;
assert (sizeof (req) - offsetof (struct req, pad) == 3);
memset (req.pad, '\0', sizeof (req.pad));
memset (&nladdr, '\0', sizeof (nladdr));
nladdr.nl_family = AF_NETLINK;
const size_t buf_size = __getpagesize ();
char *buf = alloca (buf_size);
struct iovec iov = { buf, buf_size };
if (TEMP_FAILURE_RETRY (sendto (fd, (void *) &req, sizeof (req), 0,
(struct sockaddr *) &nladdr,
sizeof (nladdr))) < 0)
goto out_fail;
int done = 0;
struct in6ailist
{
struct in6addrinfo info;
struct in6ailist *next;
} *in6ailist = NULL;
size_t in6ailistlen = 0;
int seen_ipv4 = 0;
int seen_ipv6 = 0;
do
{
struct msghdr msg =
{
(void *) &nladdr, sizeof (nladdr),
&iov, 1,
NULL, 0,
0
};
struct pollfd pfd;
pfd.fd = fd;
pfd.events = POLLIN;
pfd.revents = 0;
int pollresult = poll(&pfd, 1, 1000);
if (pollresult < 0) {
perror("glibc: check_pf: poll");
abort();
} else if (pollresult == 0 || pfd.revents & POLLIN == 0) {
fprintf(stderr, "[%ld] glibc: check_pf: netlink socket read timeout\n", gettid());
abort();
}
ssize_t read_len = TEMP_FAILURE_RETRY (recvmsg (fd, &msg, 0));
if (read_len < 0)
goto out_fail;
if (msg.msg_flags & MSG_TRUNC)
goto out_fail;
struct nlmsghdr *nlmh;
for (nlmh = (struct nlmsghdr *) buf;
NLMSG_OK (nlmh, (size_t) read_len);
nlmh = (struct nlmsghdr *) NLMSG_NEXT (nlmh, read_len))
{
if (nlmh->nlmsg_type == NLMSG_DONE)
/* We found the end, leave the loop. */
done = 1;
}
}
while (! done);
out_fail:
return ;
}
#define STACK_SIZE (1024 * 1024)
#define NTHREADS 800
void go();
void* repeat(void*);
int main (int argc, char** argv)
{
int i;
pthread_t threads[NTHREADS];
for (i = 0; i < NTHREADS; i++) {
pthread_create(threads + i, NULL, repeat, NULL);
}
for (i = 0; i < NTHREADS; i++) {
pthread_join(threads[i], NULL);
}
return 0;
}
void* repeat(void* ignored) {
int i;
for (i = 0; i < 10000; i++) {
go();
}
printf("[%ld] exit success ", gettid());
fflush(stdout);
return NULL;
}
void go() {
int fd = socket (PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (fd >= 0)
{
struct sockaddr_nl nladdr;
memset (&nladdr, '\0', sizeof (nladdr));
nladdr.nl_family = AF_NETLINK;
socklen_t addr_len = sizeof (nladdr);
if (bind (fd, (struct sockaddr *) &nladdr, sizeof (nladdr)) == 0
&& getsockname (fd, (struct sockaddr *) &nladdr,
&addr_len) == 0)
make_request (fd, nladdr.nl_pid);
close (fd);
}
}
@J-cztery
Copy link

@stevenschlansker i would like to thank you for this piece of code and your investigation! Good work!

@mhbvr
Copy link

mhbvr commented Sep 28, 2016

Thank you for investigation and reproducer.

@morj
Copy link

morj commented May 9, 2017

@stevenschlansker thanks for the testing code! How do I determine the kernel version containing the fix for this problem?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment