Skip to content

Embed URL

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
From 8e3070ad5cc17116e47f71290d86b83aa06e98eb Mon Sep 17 00:00:00 2001
From: Timothy J Fontaine <tj.fontaine@joyent.com>
Date: Tue, 12 Aug 2014 00:20:44 +0000
Subject: [PATCH 2/2] descriptions are also 4 byte aligned
---
usr/src/lib/libproc/common/Pcore.c | 22 +++++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)
diff --git a/usr/src/lib/libproc/common/Pcore.c b/usr/src/lib/libproc/common/Pcore.c
index 3b26ee5..aa871b5 100644
--- a/usr/src/lib/libproc/common/Pcore.c
+++ b/usr/src/lib/libproc/common/Pcore.c
@@ -1115,7 +1115,7 @@ err:
static int
note_notsup(struct ps_prochandle *P, size_t nbytes)
{
- dprintf("skipping unsupported note type\n");
+ dprintf("skipping unsupported note type, should read %d bytes\n", nbytes);
return (0);
}
@@ -2351,6 +2351,9 @@ Pfgrab_core(int core_fd, const char *aout_path, int *perr)
goto err;
}
break;
+ default:
+ dprintf("Pgrab_core: unknown phdr %d\n", phdr.p_type);
+ break;
}
php = (char *)php + core.e_hdr.e_phentsize;
@@ -2394,7 +2397,10 @@ Pfgrab_core(int core_fd, const char *aout_path, int *perr)
*/
for (nleft = note_phdr.p_filesz; nleft > 0; ) {
Elf64_Nhdr nhdr;
- off64_t off, namesz;
+ off64_t off, namesz, descsz;
+
+ off = lseek64(P->asfd, (off64_t)0L, SEEK_CUR);
+ dprintf("loop start offset is %x\n", off);
/*
* Although <sys/elf.h> defines both Elf32_Nhdr and Elf64_Nhdr
@@ -2417,6 +2423,7 @@ Pfgrab_core(int core_fd, const char *aout_path, int *perr)
* the name field and the padding to 4-byte alignment.
*/
namesz = P2ROUNDUP((off64_t)nhdr.n_namesz, (off64_t)4);
+
if (lseek64(P->asfd, namesz, SEEK_CUR) == (off64_t)-1) {
dprintf("failed to seek past name and padding\n");
*perr = G_STRANGE;
@@ -2427,23 +2434,28 @@ Pfgrab_core(int core_fd, const char *aout_path, int *perr)
nhdr.n_type, nhdr.n_namesz, nhdr.n_descsz);
off = lseek64(P->asfd, (off64_t)0L, SEEK_CUR);
+ dprintf("current offset is %x\n", off);
/*
* Invoke the note handler function from our table
*/
if (nhdr.n_type < sizeof (nhdlrs) / sizeof (nhdlrs[0])) {
+ dprintf("checking handler for type %d\n", nhdr.n_type);
if (nhdlrs[nhdr.n_type](P, nhdr.n_descsz) < 0) {
dprintf("handler for type %d returned < 0", nhdr.n_type);
*perr = G_NOTE;
goto err;
}
- } else
+ } else {
+ dprintf("we can't handle type %d -- size %d\n", nhdr.n_type, nhdr.n_descsz);
(void) note_notsup(P, nhdr.n_descsz);
+ }
/*
* Seek past the current note data to the next Elf_Nhdr
*/
- if (lseek64(P->asfd, off + nhdr.n_descsz,
+ descsz = P2ROUNDUP((off64_t)nhdr.n_descsz, (off64_t)4);
+ if (lseek64(P->asfd, off + descsz,
SEEK_SET) == (off64_t)-1) {
dprintf("Pgrab_core: failed to seek to next nhdr\n");
*perr = G_STRANGE;
@@ -2454,7 +2466,7 @@ Pfgrab_core(int core_fd, const char *aout_path, int *perr)
* Subtract the size of the header and its data from what
* we have left to process.
*/
- nleft -= sizeof (nhdr) + namesz + nhdr.n_descsz;
+ nleft -= sizeof (nhdr) + namesz + descsz;
}
if (core_info->in_linux == 1) {
--
1.8.4
From 3e035145701397edac4d47aff5fdc7340f2080b4 Mon Sep 17 00:00:00 2001
From: Timothy J Fontaine <tj.fontaine@joyent.com>
Date: Tue, 12 Nov 2013 23:27:22 +0000
Subject: [PATCH] add support for linux core files
---
usr/src/lib/libproc/common/Pcontrol.h | 1 +
usr/src/lib/libproc/common/Pcore.c | 250 ++++++++++++++++++++++++++++++-
usr/src/lib/libproc/common/Pcore_linux.h | 128 ++++++++++++++++
3 files changed, 374 insertions(+), 5 deletions(-)
create mode 100644 usr/src/lib/libproc/common/Pcore_linux.h
diff --git a/usr/src/lib/libproc/common/Pcontrol.h b/usr/src/lib/libproc/common/Pcontrol.h
index 3e72a32..0847d6b 100644
--- a/usr/src/lib/libproc/common/Pcontrol.h
+++ b/usr/src/lib/libproc/common/Pcontrol.h
@@ -167,6 +167,7 @@ typedef struct core_info { /* information specific to core files */
struct ssd *core_ldt; /* LDT entries from core file */
uint_t core_nldt; /* number of LDT entries in core file */
#endif
+ int in_linux;
} core_info_t;
typedef struct elf_file_header { /* extended ELF header */
diff --git a/usr/src/lib/libproc/common/Pcore.c b/usr/src/lib/libproc/common/Pcore.c
index 596c458..3b26ee5 100644
--- a/usr/src/lib/libproc/common/Pcore.c
+++ b/usr/src/lib/libproc/common/Pcore.c
@@ -49,6 +49,7 @@
#include "Pcontrol.h"
#include "P32ton.h"
#include "Putil.h"
+#include "Pcore_linux.h"
/*
* Pcore.c - Code to initialize a ps_prochandle from a core dump. We
@@ -430,6 +431,205 @@ err:
return (-1);
}
+static void
+prpsinfo32_to_psinfo(prpsinfo32 *p32, psinfo_t *psinfo)
+{
+#define X(arg) \
+ psinfo->pr_##arg = p32->pr_##arg
+
+ X(flag);
+ X(pid);
+ X(ppid);
+ X(uid);
+ X(gid);
+ X(sid);
+#undef X
+ psinfo->pr_pgid = p32->pr_pgrp;
+
+ memcpy(psinfo->pr_fname, p32->pr_fname, sizeof(psinfo->pr_fname));
+ memcpy(psinfo->pr_psargs, p32->pr_psargs, sizeof(psinfo->pr_psargs));
+}
+
+static void
+prpsinfo64_to_psinfo(prpsinfo64 *p64, psinfo_t *psinfo)
+{
+#define X(arg) \
+ psinfo->pr_##arg = p64->pr_##arg
+
+ X(flag);
+ X(pid);
+ X(ppid);
+ X(uid);
+ X(gid);
+ X(sid);
+#undef X
+ psinfo->pr_pgid = p64->pr_pgrp;
+
+ memcpy(psinfo->pr_fname, p64->pr_fname, sizeof(psinfo->pr_fname));
+ memcpy(psinfo->pr_psargs, p64->pr_psargs, sizeof(psinfo->pr_psargs));
+}
+
+static int
+note_linux_psinfo(struct ps_prochandle *P, size_t nbytes)
+{
+ core_info_t *core = P->data;
+ prpsinfo32 p32;
+ prpsinfo64 p64;
+
+ core->in_linux = 1;
+
+ if (core->core_dmodel == PR_MODEL_ILP32) {
+ if (nbytes < sizeof(p32) ||
+ read(P->asfd, &p32, sizeof (p32)) != sizeof(p32))
+ goto err;
+
+ prpsinfo32_to_psinfo(&p32, &P->psinfo);
+ } else {
+ if (nbytes < sizeof(p64) ||
+ read(P->asfd, &p64, sizeof (p64)) != sizeof(p64))
+ goto err;
+
+ prpsinfo64_to_psinfo(&p64, &P->psinfo);
+ }
+
+
+ P->status.pr_pid = P->psinfo.pr_pid;
+ P->status.pr_ppid = P->psinfo.pr_ppid;
+ P->status.pr_pgid = P->psinfo.pr_pgid;
+ P->status.pr_sid = P->psinfo.pr_sid;
+
+ P->psinfo.pr_nlwp = 0;
+ P->status.pr_nlwp = 0;
+
+ return 0;
+err:
+ dprintf("Pgrab_core: failed to read NT_PSINFO\n");
+ return (-1);
+}
+
+static void
+prstatus64_to_lwp(prstatus64 *prs64, lwp_info_t *lwp)
+{
+ LTIME_TO_TIMESPEC(lwp->lwp_status.pr_utime, prs64->pr_utime);
+ LTIME_TO_TIMESPEC(lwp->lwp_status.pr_stime, prs64->pr_stime);
+
+/* TODO map signals
+ lwp->lwp_status.pr_cursig = prs64->pr_cursig;
+ lwp->lwp_status.pr_lwppend = prs64->pr_sigpend;
+ lwp->lwp_status.pr_lwphold = prs64->pr_sighold;
+*/
+
+ lwp->lwp_status.pr_reg[REG_R15] = prs64->pr_reg.r15;
+ lwp->lwp_status.pr_reg[REG_R14] = prs64->pr_reg.r14;
+ lwp->lwp_status.pr_reg[REG_R13] = prs64->pr_reg.r13;
+ lwp->lwp_status.pr_reg[REG_R12] = prs64->pr_reg.r12;
+ lwp->lwp_status.pr_reg[REG_R11] = prs64->pr_reg.r11;
+ lwp->lwp_status.pr_reg[REG_R10] = prs64->pr_reg.r10;
+ lwp->lwp_status.pr_reg[REG_R9] = prs64->pr_reg.r9;
+ lwp->lwp_status.pr_reg[REG_R8] = prs64->pr_reg.r8;
+
+ lwp->lwp_status.pr_reg[REG_RDI] = prs64->pr_reg.rdi;
+ lwp->lwp_status.pr_reg[REG_RSI] = prs64->pr_reg.rsi;
+ lwp->lwp_status.pr_reg[REG_RBP] = prs64->pr_reg.rbp;
+ lwp->lwp_status.pr_reg[REG_RBX] = prs64->pr_reg.rbx;
+ lwp->lwp_status.pr_reg[REG_RDX] = prs64->pr_reg.rdx;
+ lwp->lwp_status.pr_reg[REG_RCX] = prs64->pr_reg.rcx;
+ lwp->lwp_status.pr_reg[REG_RAX] = prs64->pr_reg.rax;
+
+ lwp->lwp_status.pr_reg[REG_RIP] = prs64->pr_reg.rip;
+ lwp->lwp_status.pr_reg[REG_CS] = prs64->pr_reg.cs;
+ lwp->lwp_status.pr_reg[REG_RSP] = prs64->pr_reg.rsp;
+ lwp->lwp_status.pr_reg[REG_FS] = prs64->pr_reg.fs;
+ lwp->lwp_status.pr_reg[REG_SS] = prs64->pr_reg.ss;
+ lwp->lwp_status.pr_reg[REG_GS] = prs64->pr_reg.gs;
+ lwp->lwp_status.pr_reg[REG_ES] = prs64->pr_reg.es;
+ lwp->lwp_status.pr_reg[REG_DS] = prs64->pr_reg.ds;
+
+ lwp->lwp_status.pr_reg[REG_GSBASE] = prs64->pr_reg.gs_base;
+ lwp->lwp_status.pr_reg[REG_FSBASE] = prs64->pr_reg.fs_base;
+}
+
+static void
+prstatus32_to_lwp(prstatus32 *prs32, lwp_info_t *lwp)
+{
+ LTIME_TO_TIMESPEC(lwp->lwp_status.pr_utime, prs32->pr_utime);
+ LTIME_TO_TIMESPEC(lwp->lwp_status.pr_stime, prs32->pr_stime);
+
+/* TODO map signals
+ lwp->lwp_status.pr_cursig = prs32->pr_cursig;
+ lwp->lwp_status.pr_lwppend = prs32->pr_sigpend;
+ lwp->lwp_status.pr_lwphold = prs32->pr_sighold;
+*/
+
+#define X(ureg, reg) \
+ lwp->lwp_status.pr_reg[ ureg ] = prs32->pr_reg.reg
+
+ X(EBX, ebx);
+ X(ECX, ecx);
+ X(EDX, edx);
+ X(ESI, esi);
+ X(EDI, edi);
+ X(EBP, ebp);
+ X(EAX, eax);
+ X(EIP, eip);
+ X(ESP, esp);
+
+ X(DS, ds);
+ X(ES, es);
+ X(FS, fs);
+ X(GS, gs);
+ X(CS, cs);
+ X(SS, ss);
+
+ X(EFL, eflags);
+#undef X
+}
+
+static int
+note_linux_prstatus(struct ps_prochandle *P, size_t nbytes)
+{
+ core_info_t *core = P->data;
+
+ prstatus64 prs64;
+ prstatus32 prs32;
+ lwp_info_t *lwp;
+ lwpid_t tid;
+
+ core->in_linux = 1;
+
+ if (core->core_dmodel == PR_MODEL_ILP32) {
+ if (nbytes < sizeof(prs32) ||
+ read(P->asfd, &prs32, sizeof(prs32)) != nbytes)
+ goto err;
+ tid = prs32.pr_pid;
+ } else {
+ if (nbytes < sizeof(prs64) ||
+ read(P->asfd, &prs64, sizeof(prs64)) != nbytes)
+ goto err;
+ tid = prs64.pr_pid;
+ }
+
+ if ((lwp = lwpid2info(P, tid)) == NULL) {
+ dprintf("Pgrab_core: failed to add lwpid2info linux_prstatus\n");
+ return (-1);
+ }
+
+ P->psinfo.pr_nlwp++;
+ P->status.pr_nlwp++;
+
+ lwp->lwp_status.pr_lwpid = tid;
+
+ if (core->core_dmodel == PR_MODEL_ILP32)
+ prstatus32_to_lwp(&prs32, lwp);
+ else
+ prstatus64_to_lwp(&prs64, lwp);
+
+ return 0;
+err:
+ dprintf("Pgrab_core: failed to read NT_PRSTATUS\n");
+ return (-1);
+}
+
static int
note_psinfo(struct ps_prochandle *P, size_t nbytes)
{
@@ -925,9 +1125,9 @@ note_notsup(struct ps_prochandle *P, size_t nbytes)
*/
static int (*nhdlrs[])(struct ps_prochandle *, size_t) = {
note_notsup, /* 0 unassigned */
- note_notsup, /* 1 NT_PRSTATUS (old) */
+ note_linux_prstatus, /* 1 NT_PRSTATUS (old) */
note_notsup, /* 2 NT_PRFPREG (old) */
- note_notsup, /* 3 NT_PRPSINFO (old) */
+ note_linux_psinfo, /* 3 NT_PRPSINFO (old) */
#ifdef __sparc
note_xreg, /* 4 NT_PRXREG */
#else
@@ -1040,7 +1240,7 @@ core_add_mapping(struct ps_prochandle *P, GElf_Phdr *php)
core_info_t *core = P->data;
prmap_t pmap;
- dprintf("mapping base %llx filesz %llu memsz %llu offset %llu\n",
+ dprintf("mapping base %llx filesz %llx memsz %llx offset %llx\n",
(u_longlong_t)php->p_vaddr, (u_longlong_t)php->p_filesz,
(u_longlong_t)php->p_memsz, (u_longlong_t)php->p_offset);
@@ -2164,7 +2364,7 @@ Pfgrab_core(int core_fd, const char *aout_path, int *perr)
* If we couldn't find anything of type PT_NOTE, or only one PT_NOTE
* was present, abort. The core file is either corrupt or too old.
*/
- if (notes == 0 || notes == 1) {
+ if (notes == 0) {
*perr = G_NOTE;
goto err;
}
@@ -2233,6 +2433,7 @@ Pfgrab_core(int core_fd, const char *aout_path, int *perr)
*/
if (nhdr.n_type < sizeof (nhdlrs) / sizeof (nhdlrs[0])) {
if (nhdlrs[nhdr.n_type](P, nhdr.n_descsz) < 0) {
+ dprintf("handler for type %d returned < 0", nhdr.n_type);
*perr = G_NOTE;
goto err;
}
@@ -2256,6 +2457,44 @@ Pfgrab_core(int core_fd, const char *aout_path, int *perr)
nleft -= sizeof (nhdr) + namesz + nhdr.n_descsz;
}
+ if (core_info->in_linux == 1) {
+ size_t tcount, pid, tid;
+ lwp_info_t *lwp;
+
+ dprintf("linux core_dmodel = %s\n", core_info->core_dmodel == PR_MODEL_ILP32 ? "ia32" : "amd64");
+ P->status.pr_dmodel = core_info->core_dmodel;
+ /* core_info->core_content |= CC_CONTENT_TEXT; */
+
+ pid = P->status.pr_pid;
+
+ if((lwp = lwpid2info(P, pid)) == NULL) {
+ dprintf("Couldn't find first thread\n");
+ *perr = G_STRANGE;
+ goto err;
+ }
+
+ /* set representative thread */
+ memcpy(&P->status.pr_lwp, &lwp->lwp_status, sizeof(P->status.pr_lwp));
+
+ lwp = list_next(&core_info->core_lwp_head);
+
+ /*
+ * things like mdb v8 expect the first thread to actually have an id
+ * of 1, on linux that is actually the pid -- so if our tid matches our pid
+ * set it as 1, otherwise count up from there.
+ */
+ for (tid = 2, tcount = 0; tcount < core_info->core_nlwp; tcount++, lwp = list_next(lwp)) {
+ if (lwp->lwp_id == P->psinfo.pr_pid) {
+ lwp->lwp_id = 1;
+ lwp->lwp_status.pr_lwpid = 1;
+ } else {
+ lwp->lwp_id = tid;
+ lwp->lwp_status.pr_lwpid = tid;
+ ++tid;
+ }
+ }
+ }
+
if (nleft != 0) {
dprintf("Pgrab_core: note section malformed\n");
*perr = G_STRANGE;
@@ -2375,7 +2614,8 @@ Pfgrab_core(int core_fd, const char *aout_path, int *perr)
* If we're a statically linked executable, then just locate the
* executable's text and data and name them after the executable.
*/
- if (base_addr == (uintptr_t)-1L) {
+ if (base_addr == (uintptr_t)-1L || core_info->in_linux) {
+ dprintf("looking for text and data: %s\n", execname);
map_info_t *tmp, *dmp;
file_info_t *fp;
rd_loadobj_t rl;
diff --git a/usr/src/lib/libproc/common/Pcore_linux.h b/usr/src/lib/libproc/common/Pcore_linux.h
new file mode 100644
index 0000000..2bf1f4b
--- /dev/null
+++ b/usr/src/lib/libproc/common/Pcore_linux.h
@@ -0,0 +1,128 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _PCORE_LINUX_H
+#define _PCORE_LINUX_H
+
+typedef struct prpsinfo_32 { /* Information about process */
+ unsigned char pr_state; /* Numeric process state */
+ char pr_sname; /* Char for pr_state */
+ unsigned char pr_zomb; /* Zombie */
+ signed char pr_nice; /* Nice val */
+ unsigned long pr_flag; /* Flags */
+ uint16_t pr_uid; /* User ID */
+ uint16_t pr_gid; /* Group ID */
+ pid_t pr_pid; /* Process ID */
+ pid_t pr_ppid; /* Parent's process ID */
+ pid_t pr_pgrp; /* Group ID */
+ pid_t pr_sid; /* Session ID */
+ char pr_fname[16]; /* Filename of executable */
+ char pr_psargs[80]; /* Initial part of arg list */
+} prpsinfo32;
+
+typedef struct prpsinfo_64 { /* Information about process */
+ unsigned char pr_state; /* Numeric process state */
+ char pr_sname; /* Char for pr_state */
+ unsigned char pr_zomb; /* Zombie */
+ signed char pr_nice; /* Nice val */
+ unsigned long pr_flag; /* Flags */
+ uint32_t pr_uid; /* User ID */
+ uint32_t pr_gid; /* Group ID */
+ pid_t pr_pid; /* Process ID */
+ pid_t pr_ppid; /* Parent's process ID */
+ pid_t pr_pgrp; /* Group ID */
+ pid_t pr_sid; /* Session ID */
+ char pr_fname[16]; /* Filename of executable */
+ char pr_psargs[80]; /* Initial part of arg list */
+} prpsinfo64;
+
+typedef struct x64_regs {
+ uint64_t r15,r14,r13,r12,rbp,rbx,r11,r10;
+ uint64_t r9,r8,rax,rcx,rdx,rsi,rdi,orig_rax;
+ uint64_t rip,cs,eflags;
+ uint64_t rsp,ss;
+ uint64_t fs_base, gs_base;
+ uint64_t ds,es,fs,gs;
+} x64_regs;
+
+typedef struct ia32_regs {
+ uint32_t ebx, ecx, edx, esi, edi, ebp, eax;
+ uint16_t ds, __ds, es, __es;
+ uint16_t fs, __fs, gs, __gs;
+ uint32_t orig_eax, eip;
+ uint16_t cs, __cs;
+ uint32_t eflags, esp;
+ uint16_t ss, __ss;
+} ia32_regs;
+
+typedef struct elf_siginfo { /* Information about signal (unused) */
+ int32_t si_signo; /* Signal number */
+ int32_t si_code; /* Extra code */
+ int32_t si_errno; /* Errno */
+} elf_siginfo;
+
+typedef struct elf_timeval { /* Time value with microsecond resolution */
+ long tv_sec; /* Seconds */
+ long tv_usec; /* Microseconds */
+} elf_timeval;
+
+typedef struct prstatus32 { /* Information about thread; includes CPU reg*/
+ elf_siginfo pr_info; /* Info associated with signal */
+ uint16_t pr_cursig; /* Current signal */
+ unsigned long pr_sigpend; /* Set of pending signals */
+ unsigned long pr_sighold; /* Set of held signals */
+ pid_t pr_pid; /* Process ID */
+ pid_t pr_ppid; /* Parent's process ID */
+ pid_t pr_pgrp; /* Group ID */
+ pid_t pr_sid; /* Session ID */
+ elf_timeval pr_utime; /* User time */
+ elf_timeval pr_stime; /* System time */
+ elf_timeval pr_cutime; /* Cumulative user time */
+ elf_timeval pr_cstime; /* Cumulative system time */
+ ia32_regs pr_reg; /* CPU registers */
+ uint32_t pr_fpvalid; /* True if math co-processor being used */
+} prstatus32;
+
+typedef struct prstatus64 { /* Information about thread; includes CPU reg*/
+ elf_siginfo pr_info; /* Info associated with signal */
+ uint16_t pr_cursig; /* Current signal */
+ unsigned long pr_sigpend; /* Set of pending signals */
+ unsigned long pr_sighold; /* Set of held signals */
+ pid_t pr_pid; /* Process ID */
+ pid_t pr_ppid; /* Parent's process ID */
+ pid_t pr_pgrp; /* Group ID */
+ pid_t pr_sid; /* Session ID */
+ elf_timeval pr_utime; /* User time */
+ elf_timeval pr_stime; /* System time */
+ elf_timeval pr_cutime; /* Cumulative user time */
+ elf_timeval pr_cstime; /* Cumulative system time */
+ x64_regs pr_reg; /* CPU registers */
+ uint32_t pr_fpvalid; /* True if math co-processor being used */
+} prstatus64;
+
+#define LTIME_TO_TIMESPEC(dst, src) \
+ (dst).tv_sec = (src).tv_sec; \
+ (dst).tv_nsec = (src).tv_usec * 1000;
+
+#endif /* _PCORE_LINUX_H */
--
1.8.0.1
@metamatt

I find that with the libproc64.so that Joyent Manta provides (as /assets/NodeCore/public/linux-cores/libproc64.so), I can read some Linux-originated node cores and not others. For the ones that I can't read

  • mdb says mdb: cannot debug ./core.node.0.47cf1c27534e49248830073970c4e92b.1135.1422906020000000: core file is corrupt or missing required data
  • if I set LIBPROC_DEBUG=1, and run mdb on both good and bad core files, the output in terms of libproc DEBUG: Note hdr and libproc DEBUG: skipping unsupported note type is about the same until they reach n_type 1179208773 (0x46494c45 or 'FILE'). For one such core, the note header is unpacked as libproc DEBUG: Note hdr n_type=1179208773 n_namesz=5 n_descsz=5015. (Note the n_descsz value is odd, and for cores I can read, it's even. I think this is the salient part.)
  • the following libproc DEBUG output is garbage, indicating (I think) it became desynchronized from the input stream, having skipped the wrong number of bytes in some variable-sized struct
  • back on Linux, I can use readelf and objdump to look at that FILE note and it looks fine according to these tools but indeed has a non-word-aligned size:
matt@matt-dev ~/> readelf -n ~/core.node.0.47cf1c27534e49248830073970c4e92b.1135.1422906020000000

Displaying notes found at file offset 0x00002a40 with length 0x000023b8:
  Owner                 Data size   Description
  CORE                 0x00000150   NT_PRSTATUS (prstatus structure)
  CORE                 0x00000088   NT_PRPSINFO (prpsinfo structure)
  CORE                 0x00000080   NT_SIGINFO (siginfo_t data)
  CORE                 0x00000130   NT_AUXV (auxiliary vector)
  CORE                 0x00001397   NT_FILE (mapped files)
 ... (a bunch of valid filenames follow)

So the core notes are length 0x23b8 = or decimal 9144, of which the FILE note is 0x1397 or decimal 5015 bytes.

I assume the problem is that this patch assumes the notes are word-aligned and is rounding 5015 up to 5016 when skipping this note (i.e. the P2ROUNDUP at line 79 is wrong in at least some cases). But I don't know where to get the source and tools to build this patch myself to play with fixing it.

@bcantrill

Interesting; would you mind making a core file available that exhibits this property?

@metamatt

Done (the core file I have isn't suitable for public consumption so I gave it directly to @bcantrill). Curious to hear what you figure out. (FWIW I tried to make progress on this myself, checked out https://github.com/joyent/illumos-joyent which has usr/src/lib/libproc/common/Pcore.c which I presume this patch goes atop, but the build process for that is... not exactly newb-friendly.)

@metamatt

Other way around. The FILE resource is a bunch of strings whose size may or may not add up to a multiple of 4. It's padded to 4 bytes, though. Linux (ubuntu 14) objdump -h shows the FILE note as, for example

core.node.0.46218c4cf70044eb8b45dddd344a80b1.653.1426705534000000:     file format elf64-x86-64

Sections:
Idx Name          Size      VMA               LMA               File off  Algn
  6 .note.linuxcore.file/653 0000176b  0000000000000000  0000000000000000  00002844  2**2
                  CONTENTS
  7 .note.linuxcore.file 0000176b  0000000000000000  0000000000000000  00002844  2**2
                  CONTENTS

(I don't know why it's shown twice). Note 0x176b is not a multiple of 4, but it's marked as "align: 4".

For this corefile, I found the size field (in byte order, 6B 17 at offset 0x2834) and changed that to 6C 17 with a hex editor, and mdb + the patched libproc can read it. So, I think the libproc needs another ROUNDUP somewhere.

I don't know the prior history of this patch; you can tell here it was pasted from an email thread with subject "[PATCH 2/2] descriptions are also 4 byte aligned"; looks like there was some previous trial/error with alignment, and at least one more iteration is needed? Or, the patch here has already corrected that issue and the libproc64.so binary that I got from Manta's /assets is stale.

@metamatt
import sys
import struct
import subprocess

corefilename = sys.argv[1]
sections = subprocess.check_output([ 'objdump', '-h', corefilename ])
for line in sections.split('\n'):
    parts = line.split()
    if len(parts) > 1 and parts[1] == '.note.linuxcore.file':
        filenotesize = int(parts[2], 16)
        filenoteoffset = int(parts[5], 16)
        break

goodsize = ((filenotesize + 3) / 4) * 4
print "offset %s size %s should be %s" % (hex(filenoteoffset), hex(filenotesize), hex(goodsize))

with open(corefilename, 'rb+') as corefile:
    corefile.seek(filenoteoffset - 16)
    badsize = struct.unpack('I', corefile.read(4))[0]
    assert(badsize == filenotesize)
    corefile.seek(filenoteoffset - 16)
    corefile.write(struct.pack('I', goodsize))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.