Skip to content

Instantly share code, notes, and snippets.

@thebabush
Forked from jasonk000/00 contents
Created April 3, 2020 15:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thebabush/bbf24cdc6ad007f1964a20eb8c20a0e9 to your computer and use it in GitHub Desktop.
Save thebabush/bbf24cdc6ad007f1964a20eb8c20a0e9 to your computer and use it in GitHub Desktop.
improve jitdump support when mmap'd jit region expands for kdab hotspot & perf
Linux perf fixes
01 overview
02 patch to make linux perf report work cleanly with jitdump remap
03 perf script for sample input file showing mmap overwrite
04 perf report before example
05 perf report after example
--
KDAB Hotspot specific fixes
06 patch to make linux perf output output program headers for KDAB hotspot
07 patch to make hotspot UI load cleanly with jitdump remap
Jitdump support on the JVM is available but is not completely working on a busy workload.
Specifically, the JVM can expand its code cache region between compiler invocations. Perf record happily
emits a new mmap record specifying the new anonymous memory region. Downstream, the perf inject process
reads the jitdump recording and inserts code_load records into the stream. However, it does not account
for the fact that existing code_load records will be overwritten by the new anonymous region mapping.
Subsequent perf reports will read the code_load mmap records followed by an mmap region expansion of the
JIT area and will destroy previoys code_load mappings. This seems correct as according to mmap(2) MAP_FIXED
any previously mapped pages are discarded. The end result is that only the code_load records since the
most recent JIT area expansion are reported.
In addition, KDAB hotspot relies on a more complete ELF file in order to be able to read and load in the
jitdump generated ELF files. Specifically, it relies on the inclusion of a program header within the file.
With both of these fixes, we can take a clean jitdump profile of the JVM and perf record it, and then perf
report will output the files. KDAB hotspot can then open and view them.
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 70a9f8716..67c7d23d9 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1694,6 +1694,11 @@ int machine__process_mmap2_event(struct machine *machine,
if (thread == NULL)
goto out_problem;
+ if (!machine->is_jitdump && strstr(event->mmap2.filename, "/jitted-") != NULL) {
+ machine->is_jitdump = true;
+ dump_printf("file has jitted- records, assuming perf inject -j");
+ }
+
map = map__new(machine, event->mmap2.start,
event->mmap2.len, event->mmap2.pgoff,
event->mmap2.maj,
@@ -1701,7 +1706,8 @@ int machine__process_mmap2_event(struct machine *machine,
event->mmap2.ino_generation,
event->mmap2.prot,
event->mmap2.flags,
- event->mmap2.filename, thread);
+ event->mmap2.filename, thread,
+ !machine->is_jitdump);
if (map == NULL)
goto out_problem_map;
@@ -1750,11 +1756,16 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
if (!(event->header.misc & PERF_RECORD_MISC_MMAP_DATA))
prot = PROT_EXEC;
+ if (!machine->is_jitdump && strstr(event->mmap.filename, "/jitted-") != NULL) {
+ machine->is_jitdump = true;
+ dump_printf("file has jitted- records, assuming perf inject -j");
+ }
+
map = map__new(machine, event->mmap.start,
event->mmap.len, event->mmap.pgoff,
0, 0, 0, 0, prot, 0,
event->mmap.filename,
- thread);
+ thread, !machine->is_jitdump);
if (map == NULL)
goto out_problem_map;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 18e13c0cc..9d24c0696 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -59,6 +59,7 @@ struct machine {
u64 db_id;
};
bool trampolines_mapped;
+ bool is_jitdump;
};
static inline struct threads *machine__threads(struct machine *machine, pid_t tid)
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index eec9b282c..0ba6a9b14 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -148,7 +148,7 @@ void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso)
struct map *map__new(struct machine *machine, u64 start, u64 len,
u64 pgoff, u32 d_maj, u32 d_min, u64 ino,
u64 ino_gen, u32 prot, u32 flags, char *filename,
- struct thread *thread)
+ struct thread *thread, bool create_anon_map)
{
struct map *map = malloc(sizeof(*map));
struct nsinfo *nsi = NULL;
@@ -173,6 +173,10 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
nsi = nsinfo__get(thread->nsinfo);
if ((anon || no_dso) && nsi && (prot & PROT_EXEC)) {
+ if (!create_anon_map) {
+ goto out_delete;
+ }
+
snprintf(newfilename, sizeof(newfilename),
"/tmp/perf-%d.map", nsi->pid);
filename = newfilename;
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index c3614195d..8171678d5 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -113,7 +113,8 @@ void map__init(struct map *map,
struct map *map__new(struct machine *machine, u64 start, u64 len,
u64 pgoff, u32 d_maj, u32 d_min, u64 ino,
u64 ino_gen, u32 prot, u32 flags,
- char *filename, struct thread *thread);
+ char *filename, struct thread *thread,
+ bool create_anon_map);
struct map *map__new2(u64 start, struct dso *dso);
void map__delete(struct map *map);
struct map *map__clone(struct map *map);
# Here we can see two examples for C2 Compiler Thread:
# In both cases the area expanded starts at the same base address and has an expanded size.
# With the current approach, this overwrites any previous mapping at the same address which is
# in fact the correct behaviour for a real over-map.
# perf script --show-mmap-events -i perf.jit.data 2>&1 | grep -e MMAP | grep -A2 -B2 C2 after.txt
... lots of output ...
--
:-370915584 -370915584 1133505.974309: PERF_RECORD_MMAP2 28175/-370915584: [0x7f5d49d7c5c0(0x15c0) @ 0x40 103:01 10258139 1]: --xs /home/jkoch/.debug/jit/java-jit-20191024-1824.RDQhZP/jitted-28175-3313.so
:-370915584 -370915584 1133505.974309: PERF_RECORD_MMAP2 28175/-370915584: [0x7f5d49d7c5c0(0x15c0) @ 0x40 103:01 10258139 1]: --xs /home/jkoch/.debug/jit/java-jit-20191024-1824.RDQhZP/jitted-28175-3313.so
C2 CompilerThre 28194 1133506.017608: PERF_RECORD_MMAP2 28175/28194: [0x7f5d49000000(0xd90000) @ 0x7f5d49000000 00:00 0 0]: rwxp //anon
:-370915584 -370915584 1133506.017829: PERF_RECORD_MMAP2 28175/-370915584: [0x7f5d49d7f920(0xfc0) @ 0x40 103:01 10258140 1]: --xs /home/jkoch/.debug/jit/java-jit-20191024-1824.RDQhZP/jitted-28175-3314.so
:-370915584 -370915584 1133506.017829: PERF_RECORD_MMAP2 28175/-370915584: [0x7f5d49d7f920(0xfc0) @ 0x40 103:01 10258140 1]: --xs /home/jkoch/.debug/jit/java-jit-20191024-1824.RDQhZP/jitted-28175-3314.so
--
:-370915584 -370915584 1133506.415405: PERF_RECORD_MMAP2 28175/-370915584: [0x7f5d492ba060(0x32c0) @ 0x40 103:01 10258266 1]: --xs /home/jkoch/.debug/jit/java-jit-20191024-1824.RDQhZP/jitted-28175-3440.so
:-370915584 -370915584 1133506.415405: PERF_RECORD_MMAP2 28175/-370915584: [0x7f5d492ba060(0x32c0) @ 0x40 103:01 10258266 1]: --xs /home/jkoch/.debug/jit/java-jit-20191024-1824.RDQhZP/jitted-28175-3440.so
C2 CompilerThre 28200 1133506.415970: PERF_RECORD_MMAP2 28175/28200: [0x7f5d49000000(0xda0000) @ 0x7f5d49000000 00:00 0 0]: rwxp //anon
:-370915584 -370915584 1133506.416977: PERF_RECORD_MMAP2 28175/-370915584: [0x7f5d49d898a0(0x1b00) @ 0x40 103:01 10258267 1]: --xs /home/jkoch/.debug/jit/java-jit-20191024-1824.RDQhZP/jitted-28175-3441.so
:-370915584 -370915584 1133506.416977: PERF_RECORD_MMAP2 28175/-370915584: [0x7f5d49d898a0(0x1b00) @ 0x40 103:01 10258267 1]: --xs /home/jkoch/.debug/jit/java-jit-20191024-1824.RDQhZP/jitted-28175-3441.so
...
#### note there are still some frames in this perf output which are correctly jitdump mapped
#### also note the "Failed to open" for the legacy map file, which is not in the 'after' report
[vdso] with build id e3eaed8b06b574d24636b4bf459e195e15b3cb9c not found, continuing without symbols
Failed to open /tmp/perf-28175.map, continuing without symbols
# To display the perf.data header info, please use --header/--header-only options.
#
#
# Total Lost Samples: 0
#
# Samples: 98K of event 'cpu-clock'
# Event count (approx.): 24583250000
#
# Children Self Command Shared Object Symbol
# ........ ........ ............... .................... ..............................................................................................
#
46.67% 0.00% ForkJoinPool.co libpthread-2.27.so [.] start_thread
|
---start_thread
_ZL10java_startP6Thread
_ZN10JavaThread17thread_main_innerEv
_ZL12thread_entryP10JavaThreadP6Thread
_ZN9JavaCalls12call_virtualEP9JavaValue6Handle11KlassHandleP6SymbolS5_P6Thread
_ZN9JavaCalls12call_virtualEP9JavaValue11KlassHandleP6SymbolS4_P17JavaCallArgumentsP6Thread
_ZN9JavaCalls11call_helperEP9JavaValueP12methodHandleP17JavaCallArgumentsP6Thread
|
|--46.14%--0x7f5d490004e0
| 0x7f5d49007fd4
| |
| --46.02%--0x7f5d49007fd4
| |
| |--36.96%--0x7f5d49988194
| | |
| | |--36.25%--0x7f5d49977c4c
| | | |
| | | --36.24%--0x7f5d49979404
| | | |
| | | |--29.45%--0x7f5d49a8410c
| | | | |
| | | | --29.44%--0x7f5d49a59224
| | | | |
| | | | --29.31%--0x7f5d49a63318
| | | | |
| | | | |--10.80%--0x7f5d49a645a4
| | | | | |
| | | | | --6.73%--org.eclipse.mat.hprof.HprofPars
| | | | |
| | | | |--5.73%--0x7f5d49ac82c8
| | | | | |
| | | | | --4.10%--0x7f5d499d8290
| | | | | |
| | | | | --4.08%--0x7f5d490ff90b
| | | | | |
| | | | | --4.07%--_ZN13Shar
| | | | | |
| | | | | --3.91%
#### All of the output jitdump is available
[vdso] with build id e3eaed8b06b574d24636b4bf459e195e15b3cb9c not found, continuing without symbols
# To display the perf.data header info, please use --header/--header-only options.
#
#
# Total Lost Samples: 0
#
# Samples: 98K of event 'cpu-clock'
# Event count (approx.): 24583250000
#
# Children Self Command Shared Object Symbol
# ........ ........ ............... .................... ..............................................................................................
#
46.67% 0.00% ForkJoinPool.co libpthread-2.27.so [.] start_thread
|
---start_thread
_ZL10java_startP6Thread
_ZN10JavaThread17thread_main_innerEv
_ZL12thread_entryP10JavaThreadP6Thread
_ZN9JavaCalls12call_virtualEP9JavaValue6Handle11KlassHandleP6SymbolS5_P6Thread
_ZN9JavaCalls12call_virtualEP9JavaValue11KlassHandleP6SymbolS4_P17JavaCallArgumentsP6Thread
_ZN9JavaCalls11call_helperEP9JavaValueP12methodHandleP17JavaCallArgumentsP6Thread
call_stub
Interpreter
|
--46.66%--Interpreter
|
|--37.54%--java.util.concurrent.ForkJoinPool$WorkQueue.runTask(Ljava/util/concurrent/ForkJoinTask;)V
| |
| |--36.78%--java.util.concurrent.ForkJoinTask.doExec()I
| | |
| | --36.78%--java.util.concurrent.CountedCompleter.exec()Z
| | |
| | |--33.31%--java.util.stream.ForEachOps$ForEachTask.compute()V
| | | |
| | | |--31.64%--java.util.stream.AbstractPipeline.copyInto(Ljava/util/stream/Sink;Ljava/ut
| | | | |
| | | | --31.63%--java.util.Spliterators$ArraySpliterator.forEachRemaining(Ljava/
| | | | |
| | | | |--16.09%--java.util.stream.ForEachOps$ForEachOp$OfRef.accept(L
| | | | | |
| | | | | |--5.73%--org.eclipse.mat.parser.index.IntIndexColle
| | | | | | |
| | | | | | --4.10%--0x6b
| | | | | | |
| | | | | | --4.09%--_ZN13SharedRuntime26
| | | | | | |
| | | | | | --3.92%--_ZN13Obje
--- /home/jkoch/Downloads/linux-aws-4.15.0/tools/perf/util/genelf.c 2018-01-28 21:20:33.000000000 +0000
+++ ./tools/perf/util/genelf.c 2019-10-28 23:34:11.972802970 +0000
@@ -252,6 +252,7 @@
Elf_Data *d;
Elf_Scn *scn;
Elf_Ehdr *ehdr;
+ Elf_Phdr *phdr;
Elf_Shdr *shdr;
uint64_t eh_frame_base_offset;
char *strsym = NULL;
@@ -287,6 +288,19 @@
ehdr->e_shstrndx= unwinding ? 4 : 2; /* shdr index for section name */
/*
+ * setup program header
+ */
+ phdr = elf_newphdr(e, 1);
+ phdr[0].p_type = PT_LOAD;
+ phdr[0].p_offset = 0;
+ phdr[0].p_vaddr = 0;
+ phdr[0].p_paddr = 0;
+ phdr[0].p_filesz = csize;
+ phdr[0].p_memsz = csize;
+ phdr[0].p_flags = PF_X | PF_R;
+ phdr[0].p_align = 8;
+
+ /*
* setup text section
*/
scn = elf_newscn(e);
diff -u -r /home/jkoch/Downloads/linux-aws-4.15.0/tools/perf/util/genelf.h ./tools/perf/util/genelf.h
--- /home/jkoch/Downloads/linux-aws-4.15.0/tools/perf/util/genelf.h 2018-01-28 21:20:33.000000000 +0000
+++ ./tools/perf/util/genelf.h 2019-10-24 23:01:51.330612160 +0000
@@ -41,8 +41,10 @@
#if GEN_ELF_CLASS == ELFCLASS64
#define elf_newehdr elf64_newehdr
+#define elf_newphdr elf64_newphdr
#define elf_getshdr elf64_getshdr
#define Elf_Ehdr Elf64_Ehdr
+#define Elf_Phdr Elf64_Phdr
#define Elf_Shdr Elf64_Shdr
#define Elf_Sym Elf64_Sym
#define ELF_ST_TYPE(a) ELF64_ST_TYPE(a)
@@ -50,8 +52,10 @@
#define ELF_ST_VIS(a) ELF64_ST_VISIBILITY(a)
#else
#define elf_newehdr elf32_newehdr
+#define elf_newphdr elf32_newphdr
#define elf_getshdr elf32_getshdr
#define Elf_Ehdr Elf32_Ehdr
+#define Elf_Phdr Elf32_Phdr
#define Elf_Shdr Elf32_Shdr
#define Elf_Sym Elf32_Sym
#define ELF_ST_TYPE(a) ELF32_ST_TYPE(a)
diff --git a/app/perfelfmap.cpp b/app/perfelfmap.cpp
index 54e9e1e..db183c0 100644
--- a/app/perfelfmap.cpp
+++ b/app/perfelfmap.cpp
@@ -98,6 +98,12 @@ void PerfElfMap::registerElf(quint64 addr, quint64 len, quint64 pgoff,
continue;
}
+ if ((strcmp(originalPath, "//anon") == 0) && (strcmp(i->originalPath, "//anon") != 0)) {
+ // anonymous mapped sections that overlap an existing non-anonymous region should be
+ // ignored as they will overmap a probably good jitdump entry
+ return;
+ }
+
// Newly added elf overwrites existing one. Mark the existing one as overwritten and
// reinsert any fragments of it that remain.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment