Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Reproduction case for glibc netlink hang
socket(PF_NETLINK, SOCK_RAW, 0) = 78
bind(78, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0
getsockname(78, {sa_family=AF_NETLINK, pid=-1328743, groups=00000000}, [12]) = 0
sendto(78, "\24\0\0\0\26\0\1\3O\206\4V\0\0\0\0\0\0\0\0", 20, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 20
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}])
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"L\0\0\0\24\0\2\0O\206\4V\231\271\353\377\2\10\200\376\1\0\0\0\10\0\1\0\177\0\0\1"..., 4096}], msg_controllen=0, msg_flags=0}, 0) = 156
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}])
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"H\0\0\0\24\0\2\0O\206\4V\231\271\353\377\n\200\200\376\1\0\0\0\24\0\1\0\0\0\0\0"..., 4096}], msg_controllen=0, msg_flags=0}, 0) = 144
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}])
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\24\0\0\0\3\0\2\0O\206\4V\231\271\353\377\0\0\0\0", 4096}], msg_controllen=0, msg_flags=0}, 0) = 20
close(78) = 0
socket(PF_NETLINK, SOCK_RAW, 0) = 71
bind(71, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0
getsockname(71, {sa_family=AF_NETLINK, pid=-1328749, groups=00000000}, [12]) = 0
sendto(71, "\24\0\0\0\26\0\1\3O\206\4V\0\0\0\0\0\0\0\0", 20, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 20
poll([{fd=71, events=POLLIN}], 1, 1000) = 1 ([{fd=71, revents=POLLIN}])
recvmsg(71, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"L\0\0\0\24\0\2\0O\206\4V\223\271\353\377\2\10\200\376\1\0\0\0\10\0\1\0\177\0\0\1"..., 4096}], msg_controllen=0, msg_flags=0}, 0) = 156
poll([{fd=71, events=POLLIN}], 1, 1000) = 1 ([{fd=71, revents=POLLIN}])
recvmsg(71, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"H\0\0\0\24\0\2\0O\206\4V\223\271\353\377\n\200\200\376\1\0\0\0\24\0\1\0\0\0\0\0"..., 4096}], msg_controllen=0, msg_flags=0}, 0) = 144
poll([{fd=71, events=POLLIN}], 1, 1000) = 1 ([{fd=71, revents=POLLIN}])
recvmsg(71, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\24\0\0\0\3\0\2\0O\206\4V\223\271\353\377\0\0\0\0", 4096}], msg_controllen=0, msg_flags=0}, 0) = 20
close(71) = 0
socket(PF_NETLINK, SOCK_RAW, 0) = 78
bind(78, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0
getsockname(78, {sa_family=AF_NETLINK, pid=-1328759, groups=00000000}, [12]) = 0
sendto(78, "\24\0\0\0\26\0\1\3O\206\4V\0\0\0\0\0\0\0\0", 20, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 20
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}])
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"L\0\0\0\24\0\2\0O\206\4V\211\271\353\377\2\10\200\376\1\0\0\0\10\0\1\0\177\0\0\1"..., 4096}], msg_controllen=0, msg_flags=0}, 0) = 156
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}])
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"H\0\0\0\24\0\2\0O\206\4V\211\271\353\377\n\200\200\376\1\0\0\0\24\0\1\0\0\0\0\0"..., 4096}], msg_controllen=0, msg_flags=0}, 0) = 144
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}])
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\24\0\0\0\3\0\2\0O\206\4V\211\271\353\377\0\0\0\0", 4096}], msg_controllen=0, msg_flags=0}, 0) = 20
close(78) = 0
socket(PF_NETLINK, SOCK_RAW, 0) = 78
bind(78, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0
getsockname(78, {sa_family=AF_NETLINK, pid=-1328771, groups=00000000}, [12]) = 0
sendto(78, "\24\0\0\0\26\0\1\3O\206\4V\0\0\0\0\0\0\0\0", 20, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 20
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}])
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"L\0\0\0\24\0\2\0O\206\4V}\271\353\377\2\10\200\376\1\0\0\0\10\0\1\0\177\0\0\1"..., 4096}], msg_controllen=0, msg_flags=0}, 0) = 156
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}])
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"H\0\0\0\24\0\2\0O\206\4V}\271\353\377\n\200\200\376\1\0\0\0\24\0\1\0\0\0\0\0"..., 4096}], msg_controllen=0, msg_flags=0}, 0) = 144
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}])
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\24\0\0\0\3\0\2\0O\206\4V}\271\353\377\0\0\0\0", 4096}], msg_controllen=0, msg_flags=0}, 0) = 20
close(78) = 0
socket(PF_NETLINK, SOCK_RAW, 0) = 78
bind(78, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0
getsockname(78, {sa_family=AF_NETLINK, pid=-1328781, groups=00000000}, [12]) = 0
sendto(78, "\24\0\0\0\26\0\1\3O\206\4V\0\0\0\0\0\0\0\0", 20, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 20
poll([{fd=78, events=POLLIN}], 1, 1000) = 0 (Timeout)
gettid() = 9370
write(2, "[9370] glibc: check_pf: netlink "..., 52) = 52
rt_sigprocmask(SIG_UNBLOCK, [ABRT], NULL, 8) = 0
tgkill(8599, 9370, SIGABRT) = 0
--- SIGABRT {si_signo=SIGABRT, si_code=SI_TKILL, si_pid=8599, si_uid=0} ---
+++ killed by SIGABRT (core dumped) +++
/* Determine protocol families for which interfaces exist. Linux version.
Copyright (C) 2003-2014 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <stdio.h>
#include <assert.h>
#include <errno.h>
#include <ifaddrs.h>
#include <netdb.h>
#include <stddef.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <alloca.h>
#include <stdlib.h>
#include <stdint.h>
#include <sys/socket.h>
#include <poll.h>
#include <sched.h>
#include <pthread.h>
#include <sys/syscall.h>
#include <asm/types.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <bits/libc-lock.h>
#ifndef IFA_F_HOMEADDRESS
# define IFA_F_HOMEADDRESS 0
#endif
#ifndef IFA_F_OPTIMISTIC
# define IFA_F_OPTIMISTIC 0
#endif
#ifndef TEMP_FAILURE_RETRY
#define TEMP_FAILURE_RETRY(expression) \
( \
({ long int __result; \
do __result = (long int) (expression); \
while (__result == -1L && errno == EINTR); \
__result; }))
#endif
struct in6addrinfo
{
enum {
in6ai_deprecated = 1,
in6ai_homeaddress = 2
} flags:8;
uint8_t prefixlen;
uint16_t :16;
uint32_t index;
uint32_t addr[4];
};
long gettid() {
return syscall(SYS_gettid);
}
static void
make_request (int fd, pid_t pid)
{
struct req
{
struct nlmsghdr nlh;
struct rtgenmsg g;
/* struct rtgenmsg consists of a single byte. This means there
are three bytes of padding included in the REQ definition.
We make them explicit here. */
char pad[3];
} req;
struct sockaddr_nl nladdr;
req.nlh.nlmsg_len = sizeof (req);
req.nlh.nlmsg_type = RTM_GETADDR;
req.nlh.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
req.nlh.nlmsg_pid = 0;
req.nlh.nlmsg_seq = time (NULL);
req.g.rtgen_family = AF_UNSPEC;
assert (sizeof (req) - offsetof (struct req, pad) == 3);
memset (req.pad, '\0', sizeof (req.pad));
memset (&nladdr, '\0', sizeof (nladdr));
nladdr.nl_family = AF_NETLINK;
const size_t buf_size = __getpagesize ();
char *buf = alloca (buf_size);
struct iovec iov = { buf, buf_size };
if (TEMP_FAILURE_RETRY (sendto (fd, (void *) &req, sizeof (req), 0,
(struct sockaddr *) &nladdr,
sizeof (nladdr))) < 0)
goto out_fail;
int done = 0;
struct in6ailist
{
struct in6addrinfo info;
struct in6ailist *next;
} *in6ailist = NULL;
size_t in6ailistlen = 0;
int seen_ipv4 = 0;
int seen_ipv6 = 0;
do
{
struct msghdr msg =
{
(void *) &nladdr, sizeof (nladdr),
&iov, 1,
NULL, 0,
0
};
struct pollfd pfd;
pfd.fd = fd;
pfd.events = POLLIN;
pfd.revents = 0;
int pollresult = poll(&pfd, 1, 1000);
if (pollresult < 0) {
perror("glibc: check_pf: poll");
abort();
} else if (pollresult == 0 || pfd.revents & POLLIN == 0) {
fprintf(stderr, "[%ld] glibc: check_pf: netlink socket read timeout\n", gettid());
abort();
}
ssize_t read_len = TEMP_FAILURE_RETRY (recvmsg (fd, &msg, 0));
if (read_len < 0)
goto out_fail;
if (msg.msg_flags & MSG_TRUNC)
goto out_fail;
struct nlmsghdr *nlmh;
for (nlmh = (struct nlmsghdr *) buf;
NLMSG_OK (nlmh, (size_t) read_len);
nlmh = (struct nlmsghdr *) NLMSG_NEXT (nlmh, read_len))
{
if (nlmh->nlmsg_type == NLMSG_DONE)
/* We found the end, leave the loop. */
done = 1;
}
}
while (! done);
out_fail:
return ;
}
#define STACK_SIZE (1024 * 1024)
#define NTHREADS 800
void go();
void* repeat(void*);
int main (int argc, char** argv)
{
int i;
pthread_t threads[NTHREADS];
for (i = 0; i < NTHREADS; i++) {
pthread_create(threads + i, NULL, repeat, NULL);
}
for (i = 0; i < NTHREADS; i++) {
pthread_join(threads[i], NULL);
}
return 0;
}
void* repeat(void* ignored) {
int i;
for (i = 0; i < 10000; i++) {
go();
}
printf("[%ld] exit success ", gettid());
fflush(stdout);
return NULL;
}
void go() {
int fd = socket (PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (fd >= 0)
{
struct sockaddr_nl nladdr;
memset (&nladdr, '\0', sizeof (nladdr));
nladdr.nl_family = AF_NETLINK;
socklen_t addr_len = sizeof (nladdr);
if (bind (fd, (struct sockaddr *) &nladdr, sizeof (nladdr)) == 0
&& getsockname (fd, (struct sockaddr *) &nladdr,
&addr_len) == 0)
make_request (fd, nladdr.nl_pid);
close (fd);
}
}
@J-cztery

This comment has been minimized.

Copy link

J-cztery commented Aug 12, 2016

@stevenschlansker i would like to thank you for this piece of code and your investigation! Good work!

@mihbeaver

This comment has been minimized.

Copy link

mihbeaver commented Sep 28, 2016

Thank you for investigation and reproducer.

@morj

This comment has been minimized.

Copy link

morj commented May 9, 2017

@stevenschlansker thanks for the testing code! How do I determine the kernel version containing the fix for this problem?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.