A C2-compiled method entry on Linux/x64 with compressed oops:
Current HotSpot 23.0-b05: (32 bytes)
# {method} 'advance' '()V' in 'sun/reflect/generics/parser/SignatureParser'
# [sp+0x20] (sp of caller)
0x00: mov 0x8(%rsi),%r10d
0x04: shl $0x3,%r10
0x08: cmp %r10,%rax
0x0b: jne IC_miss_stub ; {runtime_call}
0x11: xchg %ax,%ax
0x14: nopl 0x0(%rax,%rax,1)
0x1c: xchg %ax,%ax
[Verified Entry Point]
0x20: push %rbp
Apparently this is a 3-byte and a 12-byte nop sequence.
another example
nops_cnt=3 // normal
3: 0x66 0x66 0x90
12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
# this: rsi:rsi = 'com/sun/beans/TypeResolver'
# parm0: rdx:rdx = '[Ljava/lang/reflect/Type;'
# [sp+0x90] (sp of caller)
0x00002aaaab55bb20: mov 0x8(%rsi),%r10d
0x00002aaaab55bb24: shl $0x3,%r10
0x00002aaaab55bb28: cmp %r10,%rax
0x00002aaaab55bb2b: jne 0x00002aaaab3688a0 ; {runtime_call}
0x00002aaaab55bb31: xchg %ax,%ax
0x00002aaaab55bb34: nopl 0x0(%rax,%rax,1)
0x00002aaaab55bb3c: xchg %ax,%ax
[Verified Entry Point]
0x00002aaaab55bb40: mov %eax,-0x6000(%rsp)
(gdb) x/40xb 0x00002aaaab55bb20
0x2aaaab55bb20: 0x44 0x8b 0x56 0x08 0x49 0xc1 0xe2 0x03
0x2aaaab55bb28: 0x49 0x3b 0xc2 0x0f 0x85 0x6f 0xcd 0xe0
0x2aaaab55bb30: 0xff 0x66 0x66 0x90 0x0f 0x1f 0x84 0x00
0x2aaaab55bb38: 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
0x2aaaab55bb40: 0x89 0x84 0x24 0x00 0xa0 0xff 0xff 0x55
nops_cnt=7
7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
# {method} 'resolve' '([Ljava/lang/reflect/Type;)[Ljava/lang/reflect/Type;' in 'com/sun/beans/TypeResolver'
# this: rsi:rsi = 'com/sun/beans/TypeResolver'
# parm0: rdx:rdx = '[Ljava/lang/reflect/Type;'
# [sp+0x90] (sp of caller)
0x00002aaaab55aee0: mov 0x8(%rsi),%r10d
0x00002aaaab55aee4: shl $0x3,%r10
0x00002aaaab55aee8: cmp %r10,%rax
0x00002aaaab55aeeb: jne 0x00002aaaab3688a0 ; {runtime_call}
0x00002aaaab55aef1: nopl 0x0(%rax)
0x00002aaaab55aef8: nopl 0x0(%rax,%rax,1)
[Verified Entry Point]
0x00002aaaab55af00: mov %eax,-0x6000(%rsp)
(gdb) x/40xb 0x00002aaaab55aee0
0x2aaaab55aee0: 0x44 0x8b 0x56 0x08 0x49 0xc1 0xe2 0x03
0x2aaaab55aee8: 0x49 0x3b 0xc2 0x0f 0x85 0xaf 0xd9 0xe0
0x2aaaab55aef0: 0xff 0x0f 0x1f 0x80 0x00 0x00 0x00 0x00
0x2aaaab55aef8: 0x0f 0x1f 0x84 0x00 0x00 0x00 0x00 0x00
0x2aaaab55af00: 0x89 0x84 0x24 0x00 0xa0 0xff 0xff 0x55
Vladimir Kozlov pointed out here that nop
and xchg ax, ax
are actually the same instruction. So hsdis wasn't really misinterpreting anything, it's just preferring the longer name...
hsdis is misinterpreting the nop sequence. It should be:
$ echo '66 66 90 0F 1F 84 00 00 00 00 00 66 66 66 90' | ./udcli -att -x -64
0000000000000000 666690 o16 nop
0000000000000003 0f1f840000000000 nop 0x0(%rax,%rax)
000000000000000b 66666690 o16 nop
This comes from x86_64.ad
void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
{
MacroAssembler masm(&cbuf);
uint insts_size = cbuf.insts_size();
if (UseCompressedOops) {
masm.load_klass(rscratch1, j_rarg0);
masm.cmpptr(rax, rscratch1);
} else {
masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
}
masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
/* WARNING these NOPs are critical so that verified entry point is properly
4 bytes aligned for patching by NativeJump::patch_verified_entry() */
int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
if (OptoBreakpoint) {
// Leave space for int3
nops_cnt -= 1;
}
nops_cnt &= 0x3; // Do not add nops if code is aligned.
if (nops_cnt > 0)
masm.nop(nops_cnt);
}
This piece of code can only generate a maximum of 3-byte nop sequence. The 12-byte nop sequence comes from basic block's code alignment.
// See 4827828 for this change. There is no globals_core_i486.hpp. I can't
// assign a different value for C2 without touching a number of files. Use
// #ifdef to minimize the change as it's late in Mantis. -- FIXME.
// c1 doesn't have this problem because the fix to 4858033 assures us
// the the vep is aligned at CodeEntryAlignment whereas c2 only aligns
// the uep and the vep doesn't get real alignment but just slops on by
// only assured that the entry instruction meets the 5 byte size requirement.
#ifdef COMPILER2
define_pd_global(intx, CodeEntryAlignment, 32);
#else
define_pd_global(intx, CodeEntryAlignment, 16);
#endif // COMPILER2
define_pd_global(intx, OptoLoopAlignment, 16);
#ifdef AMD64
define_pd_global(intx, InteriorEntryAlignment, 16);
#else
define_pd_global(intx, InteriorEntryAlignment, 4);
#endif
// Report the alignment required by this block. Must be a power of 2.
// The previous block will insert nops to get this alignment.
uint code_alignment();
uint Block::code_alignment() {
// Check for Root block
if (_pre_order == 0) return CodeEntryAlignment;
// Check for Start block
if (_pre_order == 1) return InteriorEntryAlignment;
// Check for loop alignment
if (has_loop_alignment()) return loop_alignment();
return relocInfo::addr_unit(); // no particular alignment
}
Block::alignment_padding()
// Compute alignment padding if the block needs it.
// Align a loop if loop's padding is less or equal to padding limit
// or the size of first instructions in the loop > padding.
uint alignment_padding(int current_offset) {
int block_alignment = code_alignment();
int max_pad = block_alignment-relocInfo::addr_unit();
if( max_pad > 0 ) {
assert(is_power_of_2(max_pad+relocInfo::addr_unit()), "");
int current_alignment = current_offset & max_pad;
if( current_alignment != 0 ) {
uint padding = (block_alignment-current_alignment) & max_pad;
if( has_loop_alignment() &&
padding > (uint)MaxLoopPad &&
first_inst_size() <= padding ) {
return 0;
}
return padding;
}
}
return 0;
}
Compile::Output()
Block *entry = _cfg->_blocks[1];
Block *broot = _cfg->_broot;
const StartNode *start = entry->_nodes[0]->as_Start();
// Replace StartNode with prolog
MachPrologNode *prolog = new (this) MachPrologNode();
entry->_nodes.map( 0, prolog );
bbs.map( prolog->_idx, entry );
bbs.map( start->_idx, NULL ); // start is no longer in any block
// Virtual methods need an unverified entry point
if( is_osr_compilation() ) {
if( PoisonOSREntry ) {
// TODO: Should use a ShouldNotReachHereNode...
_cfg->insert( broot, 0, new (this) MachBreakpointNode() );
}
} else {
if( _method && !_method->flags().is_static() ) {
// Insert unvalidated entry point
_cfg->insert( broot, 0, new (this) MachUEPNode() );
}
}
Compile::fill_buffer
// ...
// If the next block is the top of a loop, pad this block out to align
// the loop top a little. Helps prevent pipe stalls at loop back branches.
if (i < nblocks-1) {
Block *nb = _cfg->_blocks[i+1];
int padding = nb->alignment_padding(current_offset);
if( padding > 0 ) {
MachNode *nop = new (this) MachNopNode(padding / nop_size);
b->_nodes.insert( b->_nodes.size(), nop );
_cfg->_bbs.map( nop->_idx, b );
nop->emit(*cb, _regalloc);
current_offset = cb->insts_size();
}
#ifdef ASSERT
int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit();
block_alignment_padding = (max_loop_pad - padding);
assert(block_alignment_padding >= 0, "sanity");
#endif
}
// ...
This padding logic not only pads for loops, but also the VEPs.
BTW, the push %rbp
(0x55) code comes from a emit_opcode(cbuf, 0x50 | RBP_enc);
in MachPrologNode::emit
.