Skip to content

Instantly share code, notes, and snippets.

@rednaxelafx
Created November 20, 2011 16:15
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rednaxelafx/1380416 to your computer and use it in GitHub Desktop.
Save rednaxelafx/1380416 to your computer and use it in GitHub Desktop.
JNI native wrapper "unlikely" optimization notes. Assembly code printed from PrintNativeNMethods

A C2-compiled method entry on Linux/x64 with compressed oops:

Current HotSpot 23.0-b05: (32 bytes)

  # {method} 'advance' '()V' in 'sun/reflect/generics/parser/SignatureParser'
  #           [sp+0x20]  (sp of caller)
  0x00: mov    0x8(%rsi),%r10d
  0x04: shl    $0x3,%r10
  0x08: cmp    %r10,%rax
  0x0b: jne    IC_miss_stub  ;   {runtime_call}
  0x11: xchg   %ax,%ax
  0x14: nopl   0x0(%rax,%rax,1)
  0x1c: xchg   %ax,%ax
[Verified Entry Point]
  0x20: push   %rbp

Apparently this is a 3-byte and a 12-byte nop sequence.


another example

nops_cnt=3 // normal
3: 0x66 0x66 0x90
12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
  # this:     rsi:rsi   = 'com/sun/beans/TypeResolver'
  # parm0:    rdx:rdx   = '[Ljava/lang/reflect/Type;'
  #           [sp+0x90]  (sp of caller)
  0x00002aaaab55bb20: mov    0x8(%rsi),%r10d
  0x00002aaaab55bb24: shl    $0x3,%r10
  0x00002aaaab55bb28: cmp    %r10,%rax
  0x00002aaaab55bb2b: jne    0x00002aaaab3688a0  ;   {runtime_call}
  0x00002aaaab55bb31: xchg   %ax,%ax
  0x00002aaaab55bb34: nopl   0x0(%rax,%rax,1)
  0x00002aaaab55bb3c: xchg   %ax,%ax
[Verified Entry Point]
  0x00002aaaab55bb40: mov    %eax,-0x6000(%rsp)

(gdb) x/40xb 0x00002aaaab55bb20
0x2aaaab55bb20: 0x44    0x8b    0x56    0x08    0x49    0xc1    0xe2    0x03
0x2aaaab55bb28: 0x49    0x3b    0xc2    0x0f    0x85    0x6f    0xcd    0xe0
0x2aaaab55bb30: 0xff    0x66    0x66    0x90    0x0f    0x1f    0x84    0x00
0x2aaaab55bb38: 0x00    0x00    0x00    0x00    0x66    0x66    0x66    0x90
0x2aaaab55bb40: 0x89    0x84    0x24    0x00    0xa0    0xff    0xff    0x55

nops_cnt=7
7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
  # {method} 'resolve' '([Ljava/lang/reflect/Type;)[Ljava/lang/reflect/Type;' in 'com/sun/beans/TypeResolver'
  # this:     rsi:rsi   = 'com/sun/beans/TypeResolver'
  # parm0:    rdx:rdx   = '[Ljava/lang/reflect/Type;'
  #           [sp+0x90]  (sp of caller)
  0x00002aaaab55aee0: mov    0x8(%rsi),%r10d
  0x00002aaaab55aee4: shl    $0x3,%r10
  0x00002aaaab55aee8: cmp    %r10,%rax
  0x00002aaaab55aeeb: jne    0x00002aaaab3688a0  ;   {runtime_call}
  0x00002aaaab55aef1: nopl   0x0(%rax)
  0x00002aaaab55aef8: nopl   0x0(%rax,%rax,1)
[Verified Entry Point]
  0x00002aaaab55af00: mov    %eax,-0x6000(%rsp)

(gdb) x/40xb 0x00002aaaab55aee0
0x2aaaab55aee0: 0x44    0x8b    0x56    0x08    0x49    0xc1    0xe2    0x03
0x2aaaab55aee8: 0x49    0x3b    0xc2    0x0f    0x85    0xaf    0xd9    0xe0
0x2aaaab55aef0: 0xff    0x0f    0x1f    0x80    0x00    0x00    0x00    0x00
0x2aaaab55aef8: 0x0f    0x1f    0x84    0x00    0x00    0x00    0x00    0x00
0x2aaaab55af00: 0x89    0x84    0x24    0x00    0xa0    0xff    0xff    0x55

Vladimir Kozlov pointed out here that nop and xchg ax, ax are actually the same instruction. So hsdis wasn't really misinterpreting anything, it's just preferring the longer name...

hsdis is misinterpreting the nop sequence. It should be:

$ echo '66 66 90 0F 1F 84 00 00 00 00 00 66 66 66 90' | ./udcli -att -x -64
0000000000000000 666690           o16 nop                 
0000000000000003 0f1f840000000000 nop 0x0(%rax,%rax)      
000000000000000b 66666690         o16 nop                 

This comes from x86_64.ad

void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
{
  MacroAssembler masm(&cbuf);
  uint insts_size = cbuf.insts_size();
  if (UseCompressedOops) {
    masm.load_klass(rscratch1, j_rarg0);
    masm.cmpptr(rax, rscratch1);
  } else {
    masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
  }

  masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));

  /* WARNING these NOPs are critical so that verified entry point is properly
     4 bytes aligned for patching by NativeJump::patch_verified_entry() */
  int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
  if (OptoBreakpoint) {
    // Leave space for int3
    nops_cnt -= 1;
  }
  nops_cnt &= 0x3; // Do not add nops if code is aligned.
  if (nops_cnt > 0)
    masm.nop(nops_cnt);
}

This piece of code can only generate a maximum of 3-byte nop sequence. The 12-byte nop sequence comes from basic block's code alignment.

// See 4827828 for this change. There is no globals_core_i486.hpp. I can't
// assign a different value for C2 without touching a number of files. Use
// #ifdef to minimize the change as it's late in Mantis. -- FIXME.
// c1 doesn't have this problem because the fix to 4858033 assures us
// the the vep is aligned at CodeEntryAlignment whereas c2 only aligns
// the uep and the vep doesn't get real alignment but just slops on by
// only assured that the entry instruction meets the 5 byte size requirement.
#ifdef COMPILER2
define_pd_global(intx, CodeEntryAlignment,       32);
#else
define_pd_global(intx, CodeEntryAlignment,       16);
#endif // COMPILER2
define_pd_global(intx, OptoLoopAlignment,        16);

#ifdef AMD64
define_pd_global(intx, InteriorEntryAlignment,       16);
#else
define_pd_global(intx, InteriorEntryAlignment,       4);
#endif
  // Report the alignment required by this block.  Must be a power of 2.
  // The previous block will insert nops to get this alignment.
  uint code_alignment();

uint Block::code_alignment() {
  // Check for Root block
  if (_pre_order == 0) return CodeEntryAlignment;
  // Check for Start block
  if (_pre_order == 1) return InteriorEntryAlignment;
  // Check for loop alignment
  if (has_loop_alignment()) return loop_alignment();

  return relocInfo::addr_unit(); // no particular alignment
}

Block::alignment_padding()

  // Compute alignment padding if the block needs it.
  // Align a loop if loop's padding is less or equal to padding limit
  // or the size of first instructions in the loop > padding.
  uint alignment_padding(int current_offset) {
    int block_alignment = code_alignment();
    int max_pad = block_alignment-relocInfo::addr_unit();
    if( max_pad > 0 ) {
      assert(is_power_of_2(max_pad+relocInfo::addr_unit()), "");
      int current_alignment = current_offset & max_pad;
      if( current_alignment != 0 ) {
        uint padding = (block_alignment-current_alignment) & max_pad;
        if( has_loop_alignment() &&
            padding > (uint)MaxLoopPad &&
            first_inst_size() <= padding ) {
          return 0;
        }
        return padding;
      }
    }
    return 0;
  }

Compile::Output()

  Block *entry = _cfg->_blocks[1];
  Block *broot = _cfg->_broot;

  const StartNode *start = entry->_nodes[0]->as_Start();

  // Replace StartNode with prolog
  MachPrologNode *prolog = new (this) MachPrologNode();
  entry->_nodes.map( 0, prolog );
  bbs.map( prolog->_idx, entry );
  bbs.map( start->_idx, NULL ); // start is no longer in any block

  // Virtual methods need an unverified entry point

  if( is_osr_compilation() ) {
    if( PoisonOSREntry ) {
      // TODO: Should use a ShouldNotReachHereNode...
      _cfg->insert( broot, 0, new (this) MachBreakpointNode() );
    }
  } else {
    if( _method && !_method->flags().is_static() ) {
      // Insert unvalidated entry point
      _cfg->insert( broot, 0, new (this) MachUEPNode() );
    }
  }

Compile::fill_buffer

// ...
    // If the next block is the top of a loop, pad this block out to align
    // the loop top a little. Helps prevent pipe stalls at loop back branches.
    if (i < nblocks-1) {
      Block *nb = _cfg->_blocks[i+1];
      int padding = nb->alignment_padding(current_offset);
      if( padding > 0 ) {
        MachNode *nop = new (this) MachNopNode(padding / nop_size);
        b->_nodes.insert( b->_nodes.size(), nop );
        _cfg->_bbs.map( nop->_idx, b );
        nop->emit(*cb, _regalloc);
        current_offset = cb->insts_size();
      }
#ifdef ASSERT
      int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit();
      block_alignment_padding = (max_loop_pad - padding);
      assert(block_alignment_padding >= 0, "sanity");
#endif
    }
// ...

This padding logic not only pads for loops, but also the VEPs.


BTW, the push %rbp (0x55) code comes from a emit_opcode(cbuf, 0x50 | RBP_enc); in MachPrologNode::emit.

About the unverified entry point code sequence generated for native wrappers: On x64 with compressed oops, moving the IC miss jump out-of-line doesn't make the code sequence shorter, so it's probably better off leaving it as-is.

Current HotSpot 23.0-b05: (24 bytes)

[Entry Point]
  # {method} 'list' '(Ljava/io/File;)[Ljava/lang/String;' in 'java/io/UnixFileSystem'
  # this:     rsi:rsi   = 'java/io/UnixFileSystem'
  # parm0:    rdx:rdx   = 'java/io/File'
  #           [sp+0x50]  (sp of caller)
  0x00002aaaab5496a0: mov    0x8(%rsi),%r10d
  0x00002aaaab5496a4: shl    $0x3,%r10
  0x00002aaaab5496a8: cmp    %r10,%rax
  0x00002aaaab5496ab: je     0x00002aaaab5496b6
  0x00002aaaab5496b1: jmpq   0x00002aaaab3688a0  ;   {runtime_call}
  0x00002aaaab5496b6: xchg   %ax,%ax
[Verified Entry Point]
  0x00002aaaab5496b8: mov    %eax,-0x6000(%rsp)

Patch version 1: (24 bytes)

[Entry Point]
  # {method} 'list' '(Ljava/io/File;)[Ljava/lang/String;' in 'java/io/UnixFileSystem'
  # this:     rsi:rsi   = 'java/io/UnixFileSystem'
  # parm0:    rdx:rdx   = 'java/io/File'
  #           [sp+0x50]  (sp of caller)
  0x00002aaaab5412a0: mov    0x8(%rsi),%r10d
  0x00002aaaab5412a4: shl    $0x3,%r10
  0x00002aaaab5412a8: cmp    %r10,%rax
  0x00002aaaab5412ab: jne    0x00002aaaab541482
  0x00002aaaab5412b1: nopl   0x0(%rax)
[Verified Entry Point]
  0x00002aaaab5412b8: mov    %eax,-0x6000(%rsp)

Patch version 2: (24 bytes)

[Entry Point]
  # {method} 'list' '(Ljava/io/File;)[Ljava/lang/String;' in 'java/io/UnixFileSystem'
  # this:     rsi:rsi   = 'java/io/UnixFileSystem'
  # parm0:    rdx:rdx   = 'java/io/File'
  #           [sp+0x50]  (sp of caller)
  0x00002aaaab548ce0: mov    0x8(%rsi),%r10d
  0x00002aaaab548ce4: shl    $0x3,%r10
  0x00002aaaab548ce8: cmp    %r10,%rax
  0x00002aaaab548ceb: jne    0x00002aaaab3688a0  ;   {runtime_call}
  0x00002aaaab548cf1: nopl   0x0(%rax)
[Verified Entry Point]
  0x00002aaaab548cf8: mov    %eax,-0x6000(%rsp)

Current: (24 bytes)

[Entry Point]
  # {method} 'list' '(Ljava/io/File;)[Ljava/lang/String;' in 'java/io/UnixFileSystem'
  # this:     rsi:rsi   = 'java/io/UnixFileSystem'
  # parm0:    rdx:rdx   = 'java/io/File'
  #           [sp+0x50]  (sp of caller)
  0x00: mov    0x8(%rsi),%r10d
  0x04: shl    $0x3,%r10
  0x08: cmp    %r10,%rax
  0x0b: je     0x16
  0x11: jmpq   IC_miss_stub  ;   {runtime_call}
  0x16: xchg   %ax,%ax
[Verified Entry Point]
  0x18: mov    %eax,-0x6000(%rsp)

Patch version 1: (24 bytes)

[Entry Point]
  # {method} 'list' '(Ljava/io/File;)[Ljava/lang/String;' in 'java/io/UnixFileSystem'
  # this:     rsi:rsi   = 'java/io/UnixFileSystem'
  # parm0:    rdx:rdx   = 'java/io/File'
  #           [sp+0x50]  (sp of caller)
  0x00: mov    0x8(%rsi),%r10d
  0x04: shl    $0x3,%r10
  0x08: cmp    %r10,%rax
  0x0b: jne    Label_IC_miss
  0x11: nopl   0x0(%rax)
[Verified Entry Point]
  0x18: mov    %eax,-0x6000(%rsp)

Patch version 2: (24 bytes)

[Entry Point]
  # {method} 'list' '(Ljava/io/File;)[Ljava/lang/String;' in 'java/io/UnixFileSystem'
  # this:     rsi:rsi   = 'java/io/UnixFileSystem'
  # parm0:    rdx:rdx   = 'java/io/File'
  #           [sp+0x50]  (sp of caller)
  0x00: mov    0x8(%rsi),%r10d
  0x04: shl    $0x3,%r10
  0x08: cmp    %r10,%rax
  0x0b: jne    IC_miss_stub  ;   {runtime_call}
  0x11: nopl   0x0(%rax)
[Verified Entry Point]
  0x18: mov    %eax,-0x6000(%rsp)

==========================

On x64 without compressed oops, moving the jump out-of-line does make the code sequence a bit shorter, though:

Current HotSpot 23.0-b05: (24 bytes)

[Entry Point]
  # {method} 'list' '(Ljava/io/File;)[Ljava/lang/String;' in 'java/io/UnixFileSystem'
  # this:     rsi:rsi   = 'java/io/UnixFileSystem'
  # parm0:    rdx:rdx   = 'java/io/File'
  #           [sp+0x50]  (sp of caller)
  0x00002aaaab547000: mov    0x8(%rsi),%r10
  0x00002aaaab547004: cmp    %r10,%rax
  0x00002aaaab547007: je     0x00002aaaab547012
  0x00002aaaab54700d: jmpq   0x00002aaaab3688a0  ;   {runtime_call}
  0x00002aaaab547012: nopw   0x0(%rax,%rax,1)
[Verified Entry Point]
  0x00002aaaab547018: mov    %eax,-0x6000(%rsp)

Patch version 2: (16 bytes)

[Entry Point]
  # {method} 'list' '(Ljava/io/File;)[Ljava/lang/String;' in 'java/io/UnixFileSystem'
  # this:     rsi:rsi   = 'java/io/UnixFileSystem'
  # parm0:    rdx:rdx   = 'java/io/File'
  #           [sp+0x50]  (sp of caller)
  0x00002aaaab52ef80: mov    0x8(%rsi),%r10
  0x00002aaaab52ef84: cmp    %r10,%rax
  0x00002aaaab52ef87: jne    0x00002aaaab3688a0  ;   {runtime_call}
  0x00002aaaab52ef8d: xchg   %ax,%ax
[Verified Entry Point]
  0x00002aaaab52ef90: mov    %eax,-0x6000(%rsp)
diff -r 883328bfc472 src/cpu/x86/vm/sharedRuntime_x86_32.cpp
--- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Thu Nov 17 10:45:53 2011 -0800
+++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Sun Nov 20 23:02:50 2011 +0800
@@ -1227,23 +1227,19 @@
const Register ic_reg = rax;
const Register receiver = rcx;
- Label hit;
+
+ Label ic_miss;
Label exception_pending;
-
__ verify_oop(receiver);
__ cmpptr(ic_reg, Address(receiver, oopDesc::klass_offset_in_bytes()));
- __ jcc(Assembler::equal, hit);
-
- __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+ __ jcc(Assembler::notEqual, ic_miss);
// verified entry must be aligned for code patching.
// and the first 5 bytes must be in the same cache line
// if we align at 8 then we will be sure 5 bytes are in the same line
__ align(8);
- __ bind(hit);
-
int vep_offset = ((intptr_t)__ pc()) - start;
#ifdef COMPILER1
@@ -1592,33 +1588,15 @@
}
// check for safepoint operation in progress and/or pending suspend requests
- { Label Continue;
+ Label change_thread_state;
+ Label check_native_trans;
+ __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
+ SafepointSynchronize::_not_synchronized);
+ __ jcc(Assembler::notEqual, check_native_trans);
+ __ cmpl(Address(thread, JavaThread::suspend_flags_offset()), 0);
+ __ jcc(Assembler::notEqual, check_native_trans);
- __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
- SafepointSynchronize::_not_synchronized);
-
- Label L;
- __ jcc(Assembler::notEqual, L);
- __ cmpl(Address(thread, JavaThread::suspend_flags_offset()), 0);
- __ jcc(Assembler::equal, Continue);
- __ bind(L);
-
- // Don't use call_VM as it will see a possible pending exception and forward it
- // and never return here preventing us from clearing _last_native_pc down below.
- // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
- // preserved and correspond to the bcp/locals pointers. So we do a runtime call
- // by hand.
- //
- save_native_result(masm, ret_type, stack_slots);
- __ push(thread);
- __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
- JavaThread::check_special_condition_for_native_trans)));
- __ increment(rsp, wordSize);
- // Restore any method result value
- restore_native_result(masm, ret_type, stack_slots);
-
- __ bind(Continue);
- }
+ __ bind(change_thread_state);
// change thread state
__ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_Java);
@@ -1747,6 +1725,30 @@
// Unexpected paths are out of line and go here
+ // Handle IC miss
+ __ bind(ic_miss);
+ __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+
+ // SLOW PATH Check special condition for native transition
+ {
+ __ bind(check_native_trans);
+
+ // Don't use call_VM as it will see a possible pending exception and forward it
+ // and never return here preventing us from clearing _last_native_pc down below.
+ // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
+ // preserved and correspond to the bcp/locals pointers. So we do a runtime call
+ // by hand.
+ //
+ save_native_result(masm, ret_type, stack_slots);
+ __ push(thread);
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
+ JavaThread::check_special_condition_for_native_trans)));
+ __ increment(rsp, wordSize);
+ // Restore any method result value
+ restore_native_result(masm, ret_type, stack_slots);
+ __ jmp(change_thread_state);
+ }
+
// Slow path locking & unlocking
if (method->is_synchronized()) {
@@ -1829,7 +1831,7 @@
// BEGIN EXCEPTION PROCESSING
- // Forward the exception
+ // Forward the exception
__ bind(exception_pending);
// remove possible return value from FPU register stack
diff -r 883328bfc472 src/cpu/x86/vm/sharedRuntime_x86_64.cpp
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Thu Nov 17 10:45:53 2011 -0800
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Sun Nov 20 23:02:50 2011 +0800
@@ -1283,18 +1283,14 @@
const Register ic_reg = rax;
const Register receiver = j_rarg0;
- Label ok;
+ Label ic_miss;
Label exception_pending;
assert_different_registers(ic_reg, receiver, rscratch1);
__ verify_oop(receiver);
__ load_klass(rscratch1, receiver);
__ cmpq(ic_reg, rscratch1);
- __ jcc(Assembler::equal, ok);
-
- __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
-
- __ bind(ok);
+ __ jcc(Assembler::notEqual, ic_miss);
// Verified entry point must be aligned
__ align(8);
@@ -1636,36 +1632,15 @@
// check for safepoint operation in progress and/or pending suspend requests
- {
- Label Continue;
+ Label change_thread_state;
+ Label check_native_trans;
+ __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
+ SafepointSynchronize::_not_synchronized);
+ __ jcc(Assembler::notEqual, check_native_trans);
+ __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0);
+ __ jcc(Assembler::notEqual, check_native_trans);
- __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
- SafepointSynchronize::_not_synchronized);
-
- Label L;
- __ jcc(Assembler::notEqual, L);
- __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0);
- __ jcc(Assembler::equal, Continue);
- __ bind(L);
-
- // Don't use call_VM as it will see a possible pending exception and forward it
- // and never return here preventing us from clearing _last_native_pc down below.
- // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
- // preserved and correspond to the bcp/locals pointers. So we do a runtime call
- // by hand.
- //
- save_native_result(masm, ret_type, stack_slots);
- __ mov(c_rarg0, r15_thread);
- __ mov(r12, rsp); // remember sp
- __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
- __ andptr(rsp, -16); // align stack as required by ABI
- __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
- __ mov(rsp, r12); // restore sp
- __ reinit_heapbase();
- // Restore any method result value
- restore_native_result(masm, ret_type, stack_slots);
- __ bind(Continue);
- }
+ __ bind(change_thread_state);
// change thread state
__ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java);
@@ -1763,13 +1738,37 @@
__ ret(0);
// Unexpected paths are out of line and go here
+
+ // Handle IC miss
+ __ bind(ic_miss);
+ __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
- // forward the exception
+ // Forward the exception
__ bind(exception_pending);
-
- // and forward the exception
__ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+ // SLOW PATH Check special condition for native transition
+ {
+ __ bind(check_native_trans);
+
+ // Don't use call_VM as it will see a possible pending exception and forward it
+ // and never return here preventing us from clearing _last_native_pc down below.
+ // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
+ // preserved and correspond to the bcp/locals pointers. So we do a runtime call
+ // by hand.
+ //
+ save_native_result(masm, ret_type, stack_slots);
+ __ mov(c_rarg0, r15_thread);
+ __ mov(r12, rsp); // remember sp
+ __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
+ __ andptr(rsp, -16); // align stack as required by ABI
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
+ __ mov(rsp, r12); // restore sp
+ __ reinit_heapbase();
+ // Restore any method result value
+ restore_native_result(masm, ret_type, stack_slots);
+ __ jmp(change_thread_state);
+ }
// Slow path locking & unlocking
if (method->is_synchronized()) {
diff -r 883328bfc472 src/cpu/x86/vm/sharedRuntime_x86_32.cpp
--- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Thu Nov 17 10:45:53 2011 -0800
+++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Mon Nov 21 00:14:06 2011 +0800
@@ -1227,23 +1227,18 @@
const Register ic_reg = rax;
const Register receiver = rcx;
- Label hit;
+
Label exception_pending;
-
__ verify_oop(receiver);
__ cmpptr(ic_reg, Address(receiver, oopDesc::klass_offset_in_bytes()));
- __ jcc(Assembler::equal, hit);
-
- __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+ __ jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
// verified entry must be aligned for code patching.
// and the first 5 bytes must be in the same cache line
// if we align at 8 then we will be sure 5 bytes are in the same line
__ align(8);
- __ bind(hit);
-
int vep_offset = ((intptr_t)__ pc()) - start;
#ifdef COMPILER1
@@ -1592,33 +1587,15 @@
}
// check for safepoint operation in progress and/or pending suspend requests
- { Label Continue;
+ Label change_thread_state;
+ Label check_native_trans;
+ __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
+ SafepointSynchronize::_not_synchronized);
+ __ jcc(Assembler::notEqual, check_native_trans);
+ __ cmpl(Address(thread, JavaThread::suspend_flags_offset()), 0);
+ __ jcc(Assembler::notEqual, check_native_trans);
- __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
- SafepointSynchronize::_not_synchronized);
-
- Label L;
- __ jcc(Assembler::notEqual, L);
- __ cmpl(Address(thread, JavaThread::suspend_flags_offset()), 0);
- __ jcc(Assembler::equal, Continue);
- __ bind(L);
-
- // Don't use call_VM as it will see a possible pending exception and forward it
- // and never return here preventing us from clearing _last_native_pc down below.
- // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
- // preserved and correspond to the bcp/locals pointers. So we do a runtime call
- // by hand.
- //
- save_native_result(masm, ret_type, stack_slots);
- __ push(thread);
- __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
- JavaThread::check_special_condition_for_native_trans)));
- __ increment(rsp, wordSize);
- // Restore any method result value
- restore_native_result(masm, ret_type, stack_slots);
-
- __ bind(Continue);
- }
+ __ bind(change_thread_state);
// change thread state
__ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_Java);
@@ -1747,6 +1724,26 @@
// Unexpected paths are out of line and go here
+ // SLOW PATH Check special condition for native transition
+ {
+ __ bind(check_native_trans);
+
+ // Don't use call_VM as it will see a possible pending exception and forward it
+ // and never return here preventing us from clearing _last_native_pc down below.
+ // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
+ // preserved and correspond to the bcp/locals pointers. So we do a runtime call
+ // by hand.
+ //
+ save_native_result(masm, ret_type, stack_slots);
+ __ push(thread);
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
+ JavaThread::check_special_condition_for_native_trans)));
+ __ increment(rsp, wordSize);
+ // Restore any method result value
+ restore_native_result(masm, ret_type, stack_slots);
+ __ jmp(change_thread_state);
+ }
+
// Slow path locking & unlocking
if (method->is_synchronized()) {
@@ -1829,7 +1826,7 @@
// BEGIN EXCEPTION PROCESSING
- // Forward the exception
+ // Forward the exception
__ bind(exception_pending);
// remove possible return value from FPU register stack
diff -r 883328bfc472 src/cpu/x86/vm/sharedRuntime_x86_64.cpp
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Thu Nov 17 10:45:53 2011 -0800
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Mon Nov 21 00:14:06 2011 +0800
@@ -1283,18 +1283,13 @@
const Register ic_reg = rax;
const Register receiver = j_rarg0;
- Label ok;
Label exception_pending;
assert_different_registers(ic_reg, receiver, rscratch1);
__ verify_oop(receiver);
__ load_klass(rscratch1, receiver);
__ cmpq(ic_reg, rscratch1);
- __ jcc(Assembler::equal, ok);
-
- __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
-
- __ bind(ok);
+ __ jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
// Verified entry point must be aligned
__ align(8);
@@ -1636,36 +1631,15 @@
// check for safepoint operation in progress and/or pending suspend requests
- {
- Label Continue;
+ Label change_thread_state;
+ Label check_native_trans;
+ __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
+ SafepointSynchronize::_not_synchronized);
+ __ jcc(Assembler::notEqual, check_native_trans);
+ __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0);
+ __ jcc(Assembler::notEqual, check_native_trans);
- __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
- SafepointSynchronize::_not_synchronized);
-
- Label L;
- __ jcc(Assembler::notEqual, L);
- __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0);
- __ jcc(Assembler::equal, Continue);
- __ bind(L);
-
- // Don't use call_VM as it will see a possible pending exception and forward it
- // and never return here preventing us from clearing _last_native_pc down below.
- // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
- // preserved and correspond to the bcp/locals pointers. So we do a runtime call
- // by hand.
- //
- save_native_result(masm, ret_type, stack_slots);
- __ mov(c_rarg0, r15_thread);
- __ mov(r12, rsp); // remember sp
- __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
- __ andptr(rsp, -16); // align stack as required by ABI
- __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
- __ mov(rsp, r12); // restore sp
- __ reinit_heapbase();
- // Restore any method result value
- restore_native_result(masm, ret_type, stack_slots);
- __ bind(Continue);
- }
+ __ bind(change_thread_state);
// change thread state
__ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java);
@@ -1764,12 +1738,32 @@
// Unexpected paths are out of line and go here
- // forward the exception
+ // Forward the exception
__ bind(exception_pending);
-
- // and forward the exception
__ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+ // SLOW PATH Check special condition for native transition
+ {
+ __ bind(check_native_trans);
+
+ // Don't use call_VM as it will see a possible pending exception and forward it
+ // and never return here preventing us from clearing _last_native_pc down below.
+ // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
+ // preserved and correspond to the bcp/locals pointers. So we do a runtime call
+ // by hand.
+ //
+ save_native_result(masm, ret_type, stack_slots);
+ __ mov(c_rarg0, r15_thread);
+ __ mov(r12, rsp); // remember sp
+ __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
+ __ andptr(rsp, -16); // align stack as required by ABI
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
+ __ mov(rsp, r12); // restore sp
+ __ reinit_heapbase();
+ // Restore any method result value
+ restore_native_result(masm, ret_type, stack_slots);
+ __ jmp(change_thread_state);
+ }
// Slow path locking & unlocking
if (method->is_synchronized()) {
diff -r 883328bfc472 src/cpu/x86/vm/sharedRuntime_x86_32.cpp
--- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Thu Nov 17 10:45:53 2011 -0800
+++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Mon Nov 21 00:27:16 2011 +0800
@@ -1592,33 +1592,15 @@
}
// check for safepoint operation in progress and/or pending suspend requests
- { Label Continue;
+ Label change_thread_state;
+ Label check_native_trans;
+ __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
+ SafepointSynchronize::_not_synchronized);
+ __ jcc(Assembler::notEqual, check_native_trans);
+ __ cmpl(Address(thread, JavaThread::suspend_flags_offset()), 0);
+ __ jcc(Assembler::notEqual, check_native_trans);
- __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
- SafepointSynchronize::_not_synchronized);
-
- Label L;
- __ jcc(Assembler::notEqual, L);
- __ cmpl(Address(thread, JavaThread::suspend_flags_offset()), 0);
- __ jcc(Assembler::equal, Continue);
- __ bind(L);
-
- // Don't use call_VM as it will see a possible pending exception and forward it
- // and never return here preventing us from clearing _last_native_pc down below.
- // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
- // preserved and correspond to the bcp/locals pointers. So we do a runtime call
- // by hand.
- //
- save_native_result(masm, ret_type, stack_slots);
- __ push(thread);
- __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
- JavaThread::check_special_condition_for_native_trans)));
- __ increment(rsp, wordSize);
- // Restore any method result value
- restore_native_result(masm, ret_type, stack_slots);
-
- __ bind(Continue);
- }
+ __ bind(change_thread_state);
// change thread state
__ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_Java);
@@ -1747,6 +1729,26 @@
// Unexpected paths are out of line and go here
+ // SLOW PATH Check special condition for native transition
+ {
+ __ bind(check_native_trans);
+
+ // Don't use call_VM as it will see a possible pending exception and forward it
+ // and never return here preventing us from clearing _last_native_pc down below.
+ // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
+ // preserved and correspond to the bcp/locals pointers. So we do a runtime call
+ // by hand.
+ //
+ save_native_result(masm, ret_type, stack_slots);
+ __ push(thread);
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
+ JavaThread::check_special_condition_for_native_trans)));
+ __ increment(rsp, wordSize);
+ // Restore any method result value
+ restore_native_result(masm, ret_type, stack_slots);
+ __ jmp(change_thread_state);
+ }
+
// Slow path locking & unlocking
if (method->is_synchronized()) {
@@ -1829,7 +1831,7 @@
// BEGIN EXCEPTION PROCESSING
- // Forward the exception
+ // Forward the exception
__ bind(exception_pending);
// remove possible return value from FPU register stack
diff -r 883328bfc472 src/cpu/x86/vm/sharedRuntime_x86_64.cpp
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Thu Nov 17 10:45:53 2011 -0800
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Mon Nov 21 00:27:16 2011 +0800
@@ -1283,21 +1283,21 @@
const Register ic_reg = rax;
const Register receiver = j_rarg0;
- Label ok;
+ Label ic_hit;
Label exception_pending;
assert_different_registers(ic_reg, receiver, rscratch1);
__ verify_oop(receiver);
__ load_klass(rscratch1, receiver);
__ cmpq(ic_reg, rscratch1);
- __ jcc(Assembler::equal, ok);
+ __ jcc(Assembler::equal, ic_hit);
__ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
- __ bind(ok);
-
// Verified entry point must be aligned
__ align(8);
+
+ __ bind(ic_hit);
int vep_offset = ((intptr_t)__ pc()) - start;
@@ -1636,36 +1636,15 @@
// check for safepoint operation in progress and/or pending suspend requests
- {
- Label Continue;
+ Label change_thread_state;
+ Label check_native_trans;
+ __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
+ SafepointSynchronize::_not_synchronized);
+ __ jcc(Assembler::notEqual, check_native_trans);
+ __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0);
+ __ jcc(Assembler::notEqual, check_native_trans);
- __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
- SafepointSynchronize::_not_synchronized);
-
- Label L;
- __ jcc(Assembler::notEqual, L);
- __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0);
- __ jcc(Assembler::equal, Continue);
- __ bind(L);
-
- // Don't use call_VM as it will see a possible pending exception and forward it
- // and never return here preventing us from clearing _last_native_pc down below.
- // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
- // preserved and correspond to the bcp/locals pointers. So we do a runtime call
- // by hand.
- //
- save_native_result(masm, ret_type, stack_slots);
- __ mov(c_rarg0, r15_thread);
- __ mov(r12, rsp); // remember sp
- __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
- __ andptr(rsp, -16); // align stack as required by ABI
- __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
- __ mov(rsp, r12); // restore sp
- __ reinit_heapbase();
- // Restore any method result value
- restore_native_result(masm, ret_type, stack_slots);
- __ bind(Continue);
- }
+ __ bind(change_thread_state);
// change thread state
__ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java);
@@ -1764,12 +1743,32 @@
// Unexpected paths are out of line and go here
- // forward the exception
+ // Forward the exception
__ bind(exception_pending);
-
- // and forward the exception
__ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+ // SLOW PATH Check special condition for native transition
+ {
+ __ bind(check_native_trans);
+
+ // Don't use call_VM as it will see a possible pending exception and forward it
+ // and never return here preventing us from clearing _last_native_pc down below.
+ // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
+ // preserved and correspond to the bcp/locals pointers. So we do a runtime call
+ // by hand.
+ //
+ save_native_result(masm, ret_type, stack_slots);
+ __ mov(c_rarg0, r15_thread);
+ __ mov(r12, rsp); // remember sp
+ __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
+ __ andptr(rsp, -16); // align stack as required by ABI
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
+ __ mov(rsp, r12); // restore sp
+ __ reinit_heapbase();
+ // Restore any method result value
+ restore_native_result(masm, ret_type, stack_slots);
+ __ jmp(change_thread_state);
+ }
// Slow path locking & unlocking
if (method->is_synchronized()) {
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment