Created
September 24, 2015 23:58
-
-
Save stevenschlansker/6ad46c5ccb22bc4f3473 to your computer and use it in GitHub Desktop.
Reproduction case for glibc netlink hang
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
socket(PF_NETLINK, SOCK_RAW, 0) = 78 | |
bind(78, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0 | |
getsockname(78, {sa_family=AF_NETLINK, pid=-1328743, groups=00000000}, [12]) = 0 | |
sendto(78, "\24\0\0\0\26\0\1\3O\206\4V\0\0\0\0\0\0\0\0", 20, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 20 | |
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}]) | |
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"L\0\0\0\24\0\2\0O\206\4V\231\271\353\377\2\10\200\376\1\0\0\0\10\0\1\0\177\0\0\1"..., 4096}], msg_controllen=0, msg_flags=0}, 0) = 156 | |
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}]) | |
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"H\0\0\0\24\0\2\0O\206\4V\231\271\353\377\n\200\200\376\1\0\0\0\24\0\1\0\0\0\0\0"..., 4096}], msg_controllen=0, msg_flags=0}, 0) = 144 | |
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}]) | |
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\24\0\0\0\3\0\2\0O\206\4V\231\271\353\377\0\0\0\0", 4096}], msg_controllen=0, msg_flags=0}, 0) = 20 | |
close(78) = 0 | |
socket(PF_NETLINK, SOCK_RAW, 0) = 71 | |
bind(71, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0 | |
getsockname(71, {sa_family=AF_NETLINK, pid=-1328749, groups=00000000}, [12]) = 0 | |
sendto(71, "\24\0\0\0\26\0\1\3O\206\4V\0\0\0\0\0\0\0\0", 20, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 20 | |
poll([{fd=71, events=POLLIN}], 1, 1000) = 1 ([{fd=71, revents=POLLIN}]) | |
recvmsg(71, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"L\0\0\0\24\0\2\0O\206\4V\223\271\353\377\2\10\200\376\1\0\0\0\10\0\1\0\177\0\0\1"..., 4096}], msg_controllen=0, msg_flags=0}, 0) = 156 | |
poll([{fd=71, events=POLLIN}], 1, 1000) = 1 ([{fd=71, revents=POLLIN}]) | |
recvmsg(71, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"H\0\0\0\24\0\2\0O\206\4V\223\271\353\377\n\200\200\376\1\0\0\0\24\0\1\0\0\0\0\0"..., 4096}], msg_controllen=0, msg_flags=0}, 0) = 144 | |
poll([{fd=71, events=POLLIN}], 1, 1000) = 1 ([{fd=71, revents=POLLIN}]) | |
recvmsg(71, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\24\0\0\0\3\0\2\0O\206\4V\223\271\353\377\0\0\0\0", 4096}], msg_controllen=0, msg_flags=0}, 0) = 20 | |
close(71) = 0 | |
socket(PF_NETLINK, SOCK_RAW, 0) = 78 | |
bind(78, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0 | |
getsockname(78, {sa_family=AF_NETLINK, pid=-1328759, groups=00000000}, [12]) = 0 | |
sendto(78, "\24\0\0\0\26\0\1\3O\206\4V\0\0\0\0\0\0\0\0", 20, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 20 | |
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}]) | |
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"L\0\0\0\24\0\2\0O\206\4V\211\271\353\377\2\10\200\376\1\0\0\0\10\0\1\0\177\0\0\1"..., 4096}], msg_controllen=0, msg_flags=0}, 0) = 156 | |
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}]) | |
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"H\0\0\0\24\0\2\0O\206\4V\211\271\353\377\n\200\200\376\1\0\0\0\24\0\1\0\0\0\0\0"..., 4096}], msg_controllen=0, msg_flags=0}, 0) = 144 | |
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}]) | |
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\24\0\0\0\3\0\2\0O\206\4V\211\271\353\377\0\0\0\0", 4096}], msg_controllen=0, msg_flags=0}, 0) = 20 | |
close(78) = 0 | |
socket(PF_NETLINK, SOCK_RAW, 0) = 78 | |
bind(78, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0 | |
getsockname(78, {sa_family=AF_NETLINK, pid=-1328771, groups=00000000}, [12]) = 0 | |
sendto(78, "\24\0\0\0\26\0\1\3O\206\4V\0\0\0\0\0\0\0\0", 20, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 20 | |
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}]) | |
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"L\0\0\0\24\0\2\0O\206\4V}\271\353\377\2\10\200\376\1\0\0\0\10\0\1\0\177\0\0\1"..., 4096}], msg_controllen=0, msg_flags=0}, 0) = 156 | |
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}]) | |
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"H\0\0\0\24\0\2\0O\206\4V}\271\353\377\n\200\200\376\1\0\0\0\24\0\1\0\0\0\0\0"..., 4096}], msg_controllen=0, msg_flags=0}, 0) = 144 | |
poll([{fd=78, events=POLLIN}], 1, 1000) = 1 ([{fd=78, revents=POLLIN}]) | |
recvmsg(78, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\24\0\0\0\3\0\2\0O\206\4V}\271\353\377\0\0\0\0", 4096}], msg_controllen=0, msg_flags=0}, 0) = 20 | |
close(78) = 0 | |
socket(PF_NETLINK, SOCK_RAW, 0) = 78 | |
bind(78, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0 | |
getsockname(78, {sa_family=AF_NETLINK, pid=-1328781, groups=00000000}, [12]) = 0 | |
sendto(78, "\24\0\0\0\26\0\1\3O\206\4V\0\0\0\0\0\0\0\0", 20, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 20 | |
poll([{fd=78, events=POLLIN}], 1, 1000) = 0 (Timeout) | |
gettid() = 9370 | |
write(2, "[9370] glibc: check_pf: netlink "..., 52) = 52 | |
rt_sigprocmask(SIG_UNBLOCK, [ABRT], NULL, 8) = 0 | |
tgkill(8599, 9370, SIGABRT) = 0 | |
--- SIGABRT {si_signo=SIGABRT, si_code=SI_TKILL, si_pid=8599, si_uid=0} --- | |
+++ killed by SIGABRT (core dumped) +++ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Determine protocol families for which interfaces exist. Linux version. | |
Copyright (C) 2003-2014 Free Software Foundation, Inc. | |
This file is part of the GNU C Library. | |
The GNU C Library is free software; you can redistribute it and/or | |
modify it under the terms of the GNU Lesser General Public | |
License as published by the Free Software Foundation; either | |
version 2.1 of the License, or (at your option) any later version. | |
The GNU C Library is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
Lesser General Public License for more details. | |
You should have received a copy of the GNU Lesser General Public | |
License along with the GNU C Library; if not, see | |
<http://www.gnu.org/licenses/>. */ | |
#include <stdio.h> | |
#include <assert.h> | |
#include <errno.h> | |
#include <ifaddrs.h> | |
#include <netdb.h> | |
#include <stddef.h> | |
#include <string.h> | |
#include <time.h> | |
#include <unistd.h> | |
#include <alloca.h> | |
#include <stdlib.h> | |
#include <stdint.h> | |
#include <sys/socket.h> | |
#include <poll.h> | |
#include <sched.h> | |
#include <pthread.h> | |
#include <sys/syscall.h> | |
#include <asm/types.h> | |
#include <linux/netlink.h> | |
#include <linux/rtnetlink.h> | |
#include <bits/libc-lock.h> | |
#ifndef IFA_F_HOMEADDRESS | |
# define IFA_F_HOMEADDRESS 0 | |
#endif | |
#ifndef IFA_F_OPTIMISTIC | |
# define IFA_F_OPTIMISTIC 0 | |
#endif | |
#ifndef TEMP_FAILURE_RETRY | |
#define TEMP_FAILURE_RETRY(expression) \ | |
( \ | |
({ long int __result; \ | |
do __result = (long int) (expression); \ | |
while (__result == -1L && errno == EINTR); \ | |
__result; })) | |
#endif | |
struct in6addrinfo | |
{ | |
enum { | |
in6ai_deprecated = 1, | |
in6ai_homeaddress = 2 | |
} flags:8; | |
uint8_t prefixlen; | |
uint16_t :16; | |
uint32_t index; | |
uint32_t addr[4]; | |
}; | |
long gettid() { | |
return syscall(SYS_gettid); | |
} | |
static void | |
make_request (int fd, pid_t pid) | |
{ | |
struct req | |
{ | |
struct nlmsghdr nlh; | |
struct rtgenmsg g; | |
/* struct rtgenmsg consists of a single byte. This means there | |
are three bytes of padding included in the REQ definition. | |
We make them explicit here. */ | |
char pad[3]; | |
} req; | |
struct sockaddr_nl nladdr; | |
req.nlh.nlmsg_len = sizeof (req); | |
req.nlh.nlmsg_type = RTM_GETADDR; | |
req.nlh.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; | |
req.nlh.nlmsg_pid = 0; | |
req.nlh.nlmsg_seq = time (NULL); | |
req.g.rtgen_family = AF_UNSPEC; | |
assert (sizeof (req) - offsetof (struct req, pad) == 3); | |
memset (req.pad, '\0', sizeof (req.pad)); | |
memset (&nladdr, '\0', sizeof (nladdr)); | |
nladdr.nl_family = AF_NETLINK; | |
const size_t buf_size = __getpagesize (); | |
char *buf = alloca (buf_size); | |
struct iovec iov = { buf, buf_size }; | |
if (TEMP_FAILURE_RETRY (sendto (fd, (void *) &req, sizeof (req), 0, | |
(struct sockaddr *) &nladdr, | |
sizeof (nladdr))) < 0) | |
goto out_fail; | |
int done = 0; | |
struct in6ailist | |
{ | |
struct in6addrinfo info; | |
struct in6ailist *next; | |
} *in6ailist = NULL; | |
size_t in6ailistlen = 0; | |
int seen_ipv4 = 0; | |
int seen_ipv6 = 0; | |
do | |
{ | |
struct msghdr msg = | |
{ | |
(void *) &nladdr, sizeof (nladdr), | |
&iov, 1, | |
NULL, 0, | |
0 | |
}; | |
struct pollfd pfd; | |
pfd.fd = fd; | |
pfd.events = POLLIN; | |
pfd.revents = 0; | |
int pollresult = poll(&pfd, 1, 1000); | |
if (pollresult < 0) { | |
perror("glibc: check_pf: poll"); | |
abort(); | |
} else if (pollresult == 0 || pfd.revents & POLLIN == 0) { | |
fprintf(stderr, "[%ld] glibc: check_pf: netlink socket read timeout\n", gettid()); | |
abort(); | |
} | |
ssize_t read_len = TEMP_FAILURE_RETRY (recvmsg (fd, &msg, 0)); | |
if (read_len < 0) | |
goto out_fail; | |
if (msg.msg_flags & MSG_TRUNC) | |
goto out_fail; | |
struct nlmsghdr *nlmh; | |
for (nlmh = (struct nlmsghdr *) buf; | |
NLMSG_OK (nlmh, (size_t) read_len); | |
nlmh = (struct nlmsghdr *) NLMSG_NEXT (nlmh, read_len)) | |
{ | |
if (nlmh->nlmsg_type == NLMSG_DONE) | |
/* We found the end, leave the loop. */ | |
done = 1; | |
} | |
} | |
while (! done); | |
out_fail: | |
return ; | |
} | |
#define STACK_SIZE (1024 * 1024) | |
#define NTHREADS 800 | |
void go(); | |
void* repeat(void*); | |
int main (int argc, char** argv) | |
{ | |
int i; | |
pthread_t threads[NTHREADS]; | |
for (i = 0; i < NTHREADS; i++) { | |
pthread_create(threads + i, NULL, repeat, NULL); | |
} | |
for (i = 0; i < NTHREADS; i++) { | |
pthread_join(threads[i], NULL); | |
} | |
return 0; | |
} | |
void* repeat(void* ignored) { | |
int i; | |
for (i = 0; i < 10000; i++) { | |
go(); | |
} | |
printf("[%ld] exit success ", gettid()); | |
fflush(stdout); | |
return NULL; | |
} | |
void go() { | |
int fd = socket (PF_NETLINK, SOCK_RAW, NETLINK_ROUTE); | |
if (fd >= 0) | |
{ | |
struct sockaddr_nl nladdr; | |
memset (&nladdr, '\0', sizeof (nladdr)); | |
nladdr.nl_family = AF_NETLINK; | |
socklen_t addr_len = sizeof (nladdr); | |
if (bind (fd, (struct sockaddr *) &nladdr, sizeof (nladdr)) == 0 | |
&& getsockname (fd, (struct sockaddr *) &nladdr, | |
&addr_len) == 0) | |
make_request (fd, nladdr.nl_pid); | |
close (fd); | |
} | |
} |
Thank you for investigation and reproducer.
@stevenschlansker thanks for the testing code! How do I determine the kernel version containing the fix for this problem?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@stevenschlansker i would like to thank you for this piece of code and your investigation! Good work!