Skip to content

Instantly share code, notes, and snippets.

@Glavo
Created March 18, 2021 20:44
Show Gist options
  • Save Glavo/73a83a744e4827ce2b6a98df690fec75 to your computer and use it in GitHub Desktop.
Save Glavo/73a83a744e4827ce2b6a98df690fec75 to your computer and use it in GitHub Desktop.
DfsTest
; ModuleID = 'test2.cpp'
source_filename = "test2.cpp"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
@_ZL3cnt = internal unnamed_addr global i32 0, align 4
@.str = private unnamed_addr constant [16 x i8] c"%d\0ATime: %ldms\0A\00", align 1
; Function Attrs: nofree nounwind uwtable
define dso_local void @_Z3Dfsiiii(i32 %0, i32 %1, i32 %2, i32 %3) local_unnamed_addr #0 {
%5 = or i32 %2, %1
%6 = or i32 %5, %3
%7 = and i32 %6, 32767
%8 = xor i32 %7, 32767
%9 = icmp eq i32 %8, 0
br i1 %9, label %34, label %10
10: ; preds = %4
%11 = icmp eq i32 %0, 15
%12 = add nsw i32 %0, 1
br i1 %11, label %13, label %22
13: ; preds = %10
%14 = load i32, i32* @_ZL3cnt, align 4, !tbaa !2
br label %15
15: ; preds = %13, %15
%16 = phi i32 [ %20, %15 ], [ %14, %13 ]
%17 = phi i32 [ %19, %15 ], [ %8, %13 ]
%18 = add i32 %17, -1
%19 = and i32 %17, %18
%20 = add nsw i32 %16, 1
%21 = icmp eq i32 %19, 0
br i1 %21, label %33, label %15
22: ; preds = %10, %22
%23 = phi i32 [ %26, %22 ], [ %8, %10 ]
%24 = sub nsw i32 0, %23
%25 = and i32 %23, %24
%26 = xor i32 %25, %23
%27 = or i32 %25, %1
%28 = or i32 %25, %2
%29 = ashr i32 %28, 1
%30 = or i32 %25, %3
%31 = shl i32 %30, 1
tail call void @_Z3Dfsiiii(i32 %12, i32 %27, i32 %29, i32 %31)
%32 = icmp eq i32 %26, 0
br i1 %32, label %34, label %22
33: ; preds = %15
store i32 %20, i32* @_ZL3cnt, align 4, !tbaa !2
br label %34
34: ; preds = %22, %33, %4
ret void
}
; Function Attrs: norecurse nounwind uwtable
define dso_local i32 @main() local_unnamed_addr #1 {
tail call void @_Z3Dfsiiii(i32 1, i32 0, i32 0, i32 0)
tail call void @_Z3Dfsiiii(i32 1, i32 0, i32 0, i32 0)
tail call void @_Z3Dfsiiii(i32 1, i32 0, i32 0, i32 0)
tail call void @_Z3Dfsiiii(i32 1, i32 0, i32 0, i32 0)
tail call void @_Z3Dfsiiii(i32 1, i32 0, i32 0, i32 0)
tail call void @_Z3Dfsiiii(i32 1, i32 0, i32 0, i32 0)
tail call void @_Z3Dfsiiii(i32 1, i32 0, i32 0, i32 0)
tail call void @_Z3Dfsiiii(i32 1, i32 0, i32 0, i32 0)
tail call void @_Z3Dfsiiii(i32 1, i32 0, i32 0, i32 0)
tail call void @_Z3Dfsiiii(i32 1, i32 0, i32 0, i32 0)
store i32 0, i32* @_ZL3cnt, align 4, !tbaa !2
%1 = tail call i64 @clock() #4
store i32 0, i32* @_ZL3cnt, align 4, !tbaa !2
tail call void @_Z3Dfsiiii(i32 1, i32 0, i32 0, i32 0)
store i32 0, i32* @_ZL3cnt, align 4, !tbaa !2
tail call void @_Z3Dfsiiii(i32 1, i32 0, i32 0, i32 0)
store i32 0, i32* @_ZL3cnt, align 4, !tbaa !2
tail call void @_Z3Dfsiiii(i32 1, i32 0, i32 0, i32 0)
store i32 0, i32* @_ZL3cnt, align 4, !tbaa !2
tail call void @_Z3Dfsiiii(i32 1, i32 0, i32 0, i32 0)
store i32 0, i32* @_ZL3cnt, align 4, !tbaa !2
tail call void @_Z3Dfsiiii(i32 1, i32 0, i32 0, i32 0)
store i32 0, i32* @_ZL3cnt, align 4, !tbaa !2
tail call void @_Z3Dfsiiii(i32 1, i32 0, i32 0, i32 0)
store i32 0, i32* @_ZL3cnt, align 4, !tbaa !2
tail call void @_Z3Dfsiiii(i32 1, i32 0, i32 0, i32 0)
store i32 0, i32* @_ZL3cnt, align 4, !tbaa !2
tail call void @_Z3Dfsiiii(i32 1, i32 0, i32 0, i32 0)
store i32 0, i32* @_ZL3cnt, align 4, !tbaa !2
tail call void @_Z3Dfsiiii(i32 1, i32 0, i32 0, i32 0)
store i32 0, i32* @_ZL3cnt, align 4, !tbaa !2
tail call void @_Z3Dfsiiii(i32 1, i32 0, i32 0, i32 0)
%2 = tail call i64 @clock() #4
%3 = load i32, i32* @_ZL3cnt, align 4, !tbaa !2
%4 = sub nsw i64 %2, %1
%5 = sdiv i64 %4, 10000
%6 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([16 x i8], [16 x i8]* @.str, i64 0, i64 0), i32 %3, i64 %5)
ret i32 0
}
; Function Attrs: nounwind
declare dso_local i64 @clock() local_unnamed_addr #2
; Function Attrs: nofree nounwind
declare dso_local i32 @printf(i8* nocapture readonly, ...) local_unnamed_addr #3
attributes #0 = { nofree nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #3 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #4 = { nounwind }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"Ubuntu clang version 11.0.0-2~ubuntu20.04.1"}
!2 = !{!3, !3, i64 0}
!3 = !{!"int", !4, i64 0}
!4 = !{!"omnipotent char", !5, i64 0}
!5 = !{!"Simple C++ TBAA"}
.file "test2.cpp"
.text
.p2align 4
.globl _Z3Dfsiiii
.type _Z3Dfsiiii, @function
_Z3Dfsiiii:
.LFB30:
.cfi_startproc
endbr64
pushq %r15
.cfi_def_cfa_offset 16
.cfi_offset 15, -16
pushq %r14
.cfi_def_cfa_offset 24
.cfi_offset 14, -24
pushq %r13
.cfi_def_cfa_offset 32
.cfi_offset 13, -32
pushq %r12
.cfi_def_cfa_offset 40
.cfi_offset 12, -40
pushq %rbp
.cfi_def_cfa_offset 48
.cfi_offset 6, -48
pushq %rbx
.cfi_def_cfa_offset 56
.cfi_offset 3, -56
movl %edx, %ebx
orl %ecx, %ebx
orl %esi, %ebx
notl %ebx
subq $24, %rsp
.cfi_def_cfa_offset 80
andl $32767, %ebx
je .L1
cmpl $15, %edi
jne .L13
movl _ZL3cnt(%rip), %eax
leal 1(%rax), %edx
.L4:
movl %ebx, %eax
movl %ebx, %ecx
movl %edx, %esi
addl $1, %edx
negl %eax
andl %ebx, %eax
xorl %eax, %ebx
cmpl %ecx, %eax
jne .L4
movl %esi, _ZL3cnt(%rip)
.L1:
addq $24, %rsp
.cfi_remember_state
.cfi_def_cfa_offset 56
popq %rbx
.cfi_def_cfa_offset 48
popq %rbp
.cfi_def_cfa_offset 40
popq %r12
.cfi_def_cfa_offset 32
popq %r13
.cfi_def_cfa_offset 24
popq %r14
.cfi_def_cfa_offset 16
popq %r15
.cfi_def_cfa_offset 8
ret
.p2align 4,,10
.p2align 3
.L13:
.cfi_restore_state
movl %esi, %ebp
movl %edx, %r12d
leal 1(%rdi), %r15d
movl %ecx, %r13d
.L3:
movl %ebx, %eax
movl %r13d, %ecx
movl %r12d, %edx
movl %ebp, %esi
negl %eax
movl %r15d, %edi
movl %ebx, %r14d
andl %ebx, %eax
orl %eax, %ecx
orl %eax, %edx
orl %eax, %esi
movl %eax, 12(%rsp)
addl %ecx, %ecx
sarl %edx
xorl %eax, %ebx
call _Z3Dfsiiii
movl 12(%rsp), %eax
cmpl %eax, %r14d
jne .L3
addq $24, %rsp
.cfi_def_cfa_offset 56
popq %rbx
.cfi_def_cfa_offset 48
popq %rbp
.cfi_def_cfa_offset 40
popq %r12
.cfi_def_cfa_offset 32
popq %r13
.cfi_def_cfa_offset 24
popq %r14
.cfi_def_cfa_offset 16
popq %r15
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.LFE30:
.size _Z3Dfsiiii, .-_Z3Dfsiiii
.section .rodata.str1.1,"aMS",@progbits,1
.LC0:
.string "%d\nTime: %ldms\n"
.section .text.startup,"ax",@progbits
.p2align 4
.globl main
.type main, @function
main:
.LFB31:
.cfi_startproc
endbr64
pushq %r15
.cfi_def_cfa_offset 16
.cfi_offset 15, -16
pushq %r14
.cfi_def_cfa_offset 24
.cfi_offset 14, -24
pushq %r13
.cfi_def_cfa_offset 32
.cfi_offset 13, -32
pushq %r12
.cfi_def_cfa_offset 40
.cfi_offset 12, -40
pushq %rbp
.cfi_def_cfa_offset 48
.cfi_offset 6, -48
movl $10, %ebp
pushq %rbx
.cfi_def_cfa_offset 56
.cfi_offset 3, -56
subq $8, %rsp
.cfi_def_cfa_offset 64
.p2align 4,,10
.p2align 3
.L19:
movl $32767, %r11d
.L15:
movl %r11d, %r9d
movl %r11d, %r12d
negl %r9d
andl %r11d, %r9d
leal (%r9,%r9), %r13d
movl %r9d, %ebx
xorl %r9d, %r11d
sarl %ebx
movl %r13d, %r8d
orl %ebx, %r8d
orl %r9d, %r8d
notl %r8d
andl $32767, %r8d
je .L17
.L18:
movl %r8d, %r10d
movl %r13d, %ecx
movl %ebx, %edx
movl %r9d, %esi
negl %r10d
movl $3, %edi
movl %r8d, %r14d
andl %r8d, %r10d
orl %r10d, %ecx
orl %r10d, %edx
orl %r10d, %esi
xorl %r10d, %r8d
addl %ecx, %ecx
sarl %edx
call _Z3Dfsiiii
cmpl %r14d, %r10d
jne .L18
.L17:
cmpl %r9d, %r12d
jne .L15
subl $1, %ebp
jne .L19
movl $0, _ZL3cnt(%rip)
movl $10, %ebp
call clock@PLT
movq %rax, %rbx
.p2align 4,,10
.p2align 3
.L24:
movl $0, _ZL3cnt(%rip)
movl $32767, %r10d
.L20:
movl %r10d, %r9d
movl %r10d, %r12d
negl %r9d
andl %r10d, %r9d
leal (%r9,%r9), %r13d
movl %r9d, %r11d
xorl %r9d, %r10d
sarl %r11d
movl %r13d, %r8d
orl %r11d, %r8d
orl %r9d, %r8d
notl %r8d
andl $32767, %r8d
je .L22
.L23:
movl %r8d, %r14d
movl %r13d, %ecx
movl %r11d, %edx
movl %r9d, %esi
negl %r14d
movl %r8d, %r15d
movl $3, %edi
andl %r8d, %r14d
orl %r14d, %ecx
orl %r14d, %edx
orl %r14d, %esi
xorl %r14d, %r8d
addl %ecx, %ecx
sarl %edx
call _Z3Dfsiiii
cmpl %r15d, %r14d
jne .L23
.L22:
cmpl %r9d, %r12d
jne .L20
subl $1, %ebp
jne .L24
call clock@PLT
leaq .LC0(%rip), %rsi
movl $1, %edi
movabsq $3777893186295716171, %rdx
subq %rbx, %rax
movq %rax, %rcx
imulq %rdx
xorl %eax, %eax
sarq $63, %rcx
sarq $11, %rdx
subq %rcx, %rdx
movq %rdx, %rcx
movl _ZL3cnt(%rip), %edx
call __printf_chk@PLT
addq $8, %rsp
.cfi_def_cfa_offset 56
xorl %eax, %eax
popq %rbx
.cfi_def_cfa_offset 48
popq %rbp
.cfi_def_cfa_offset 40
popq %r12
.cfi_def_cfa_offset 32
popq %r13
.cfi_def_cfa_offset 24
popq %r14
.cfi_def_cfa_offset 16
popq %r15
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.LFE31:
.size main, .-main
.local _ZL3cnt
.comm _ZL3cnt,4,4
.ident "GCC: (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0"
.section .note.GNU-stack,"",@progbits
.section .note.gnu.property,"a"
.align 8
.long 1f - 0f
.long 4f - 1f
.long 5
0:
.string "GNU"
1:
.align 8
.long 0xc0000002
.long 3f - 2f
2:
.long 0x3
3:
.align 8
4:
============================= C2-compiled nmethod ==============================
----------------------------------- Assembly -----------------------------------
Compiled method (c2) 71 60 4 DfsTest2::Dfs (76 bytes)
total in heap [0x00007f09771e4090,0x00007f09771e4540] = 1200
relocation [0x00007f09771e41e8,0x00007f09771e4208] = 32
main code [0x00007f09771e4220,0x00007f09771e4300] = 224
stub code [0x00007f09771e4300,0x00007f09771e4328] = 40
oops [0x00007f09771e4328,0x00007f09771e4330] = 8
metadata [0x00007f09771e4330,0x00007f09771e4338] = 8
scopes data [0x00007f09771e4338,0x00007f09771e43b0] = 120
scopes pcs [0x00007f09771e43b0,0x00007f09771e4520] = 368
dependencies [0x00007f09771e4520,0x00007f09771e4528] = 8
handler table [0x00007f09771e4528,0x00007f09771e4540] = 24
[Constant Pool (empty)]
[MachCode]
[Verified Entry Point]
# {method} {0x00007f09490a0498} 'Dfs' '(IIII)V' in 'DfsTest2'
# parm0: rsi = int
# parm1: rdx = int
# parm2: rcx = int
# parm3: r8 = int
# [sp+0x40] (sp of caller)
0x00007f09771e4220: 8984 2400 | c0fe ff55
0x00007f09771e4228: ;*synchronization entry
; - DfsTest2::Dfs@-1 (line 6)
0x00007f09771e4228: 4883 ec30 | 4489 4424 | 0889 4c24 | 0489 1424 | 448b da44 | 0bd9 450b | d841 baff | 7f00 00c4
0x00007f09771e4248: ;*iand {reexecute=0 rethrow=0 return_oop=0}
; - DfsTest2::Dfs@10 (line 6)
0x00007f09771e4248: c220 f2ea
0x00007f09771e424c: ;*ifeq {reexecute=0 rethrow=0 return_oop=0}
; - DfsTest2::Dfs@15 (line 7)
0x00007f09771e424c: 85ed 7450 | 83fe 0f74 | 5b44 8bde
0x00007f09771e4258: ;*iadd {reexecute=0 rethrow=0 return_oop=0}
; - DfsTest2::Dfs@52 (line 13)
0x00007f09771e4258: 41ff c389 | 7424 0c44 | 895c 2410 | 660f 1f44 | 0000 660f
0x00007f09771e426c: ;*iload {reexecute=0 rethrow=0 return_oop=0}
; - DfsTest2::Dfs@18 (line 8)
0x00007f09771e426c: 1f44 0000 | c4e2 20f3
0x00007f09771e4274: ;*iand {reexecute=0 rethrow=0 return_oop=0}
; - DfsTest2::Dfs@23 (line 8)
0x00007f09771e4274: dd44 8b44 | 2408 450b | c38b 4c24 | 0441 0bcb
0x00007f09771e4284: ;*ixor {reexecute=0 rethrow=0 return_oop=0}
; - DfsTest2::Dfs@30 (line 9)
0x00007f09771e4284: 4133 eb8b | 1424 410b
0x00007f09771e428c: ;*ior {reexecute=0 rethrow=0 return_oop=0}
; - DfsTest2::Dfs@56 (line 13)
0x00007f09771e428c: d341 d1e0
0x00007f09771e4290: ;*ishr {reexecute=0 rethrow=0 return_oop=0}
; - DfsTest2::Dfs@62 (line 13)
0x00007f09771e4290: d1f9 8b74
0x00007f09771e4294: ; {static_call}
0x00007f09771e4294: 2410 90e8
0x00007f09771e4298: ; ImmutableOopMap {}
;*invokestatic Dfs {reexecute=0 rethrow=0 return_oop=0}
; - DfsTest2::Dfs@69 (line 13)
0x00007f09771e4298: 84ff ffff
0x00007f09771e429c: ;*ifeq {reexecute=0 rethrow=0 return_oop=0}
; - DfsTest2::Dfs@15 (line 7)
0x00007f09771e429c: 85ed 75d0 | 4883 c430 | 5d4d 8b97 | 1001 0000
0x00007f09771e42ac: ; {poll_return}
0x00007f09771e42ac: 4185 02c3
0x00007f09771e42b0: ; {oop(a 'java/lang/Class'{0x000000044084ca48} = 'DfsTest2')}
0x00007f09771e42b0: 49ba 48ca | 8440 0400 | 0000 458b
0x00007f09771e42bc: ;*goto {reexecute=0 rethrow=0 return_oop=0}
; - DfsTest2::Dfs@72 (line 14)
0x00007f09771e42bc: 4274 6690 | 4d8b 9f10 | 0100 00c4 | e230 f3dd
0x00007f09771e42cc: ;*ixor {reexecute=0 rethrow=0 return_oop=0}
; - DfsTest2::Dfs@30 (line 9)
0x00007f09771e42cc: 4133 e941
0x00007f09771e42d0: ;*iadd {reexecute=0 rethrow=0 return_oop=0}
; - DfsTest2::Dfs@43 (line 11)
0x00007f09771e42d0: ffc0 4589
0x00007f09771e42d4: ; ImmutableOopMap {r10=Oop }
;*goto {reexecute=1 rethrow=0 return_oop=0}
; - (reexecute) DfsTest2::Dfs@72 (line 14)
; {poll}
0x00007f09771e42d4: 4274 4185
0x00007f09771e42d8: ;*goto {reexecute=0 rethrow=0 return_oop=0}
; - DfsTest2::Dfs@72 (line 14)
0x00007f09771e42d8: 0385 ed75
0x00007f09771e42dc: ;*ifeq {reexecute=0 rethrow=0 return_oop=0}
; - DfsTest2::Dfs@15 (line 7)
0x00007f09771e42dc: e3eb c148 | 8bf0 4883
0x00007f09771e42e4: ; {runtime_call _rethrow_Java}
0x00007f09771e42e4: c430 5de9
0x00007f09771e42e8: ;*iload {reexecute=0 rethrow=0 return_oop=0}
; - DfsTest2::Dfs@18 (line 8)
0x00007f09771e42e8: 140e 56f8 | f4f4 f4f4 | f4f4 f4f4 | f4f4 f4f4 | f4f4 f4f4 | f4f4 f4f4
[Stub Code]
0x00007f09771e4300: ; {no_reloc}
0x00007f09771e4300: 48bb 0000 | 0000 0000
0x00007f09771e4308: ; {runtime_call}
0x00007f09771e4308: 0000 e9fb
0x00007f09771e430c: ; {runtime_call ExceptionBlob}
0x00007f09771e430c: ffff ffe9 | 6c2b 55f8
[Deopt Handler Code]
0x00007f09771e4314: e800 0000 | 0048 832c
0x00007f09771e431c: ; {runtime_call DeoptimizationBlob}
0x00007f09771e431c: 2405 e97d | 994a f8f4 | f4f4 f4f4
[/MachCode]
@XZiar
Copy link

XZiar commented Mar 19, 2021

把JVM的asm清理了一下扔进反汇编是这样:

mov dword ptr [rsp - 0x14000], eax
push rbp
sub rsp, 0x30
mov dword ptr [rsp + 8], r8d
mov dword ptr [rsp + 4], ecx
mov dword ptr [rsp], edx
mov r11d, edx
or r11d, ecx
or r11d, r8d
mov r10d, 0x7fff
andn ebp, r11d, r10d
test ebp, ebp
je 0x7f09771e42a0
cmp esi, 0xf
je 0x7f09771e42b0
mov r11d, esi
inc r11d
mov dword ptr [rsp + 0xc], esi
mov dword ptr [rsp + 0x10], r11d
nop word ptr [rax + rax]
nop word ptr [rax + rax]
blsi r11d, ebp
mov r8d, dword ptr [rsp + 8]
or r8d, r11d
mov ecx, dword ptr [rsp + 4]
or ecx, r11d
xor ebp, r11d
mov edx, dword ptr [rsp]
or edx, r11d
shl r8d, 1
sar ecx, 1
mov esi, dword ptr [rsp + 0x10]
nop 
call 0x7f09771e4220
test ebp, ebp
jne 0x7f09771e4270
add rsp, 0x30
pop rbp
mov r10, qword ptr [r15 + 0x110]
test dword ptr [r10], eax
ret 
movabs r10, 0x44084ca48
mov r8d, dword ptr [r10 + 0x74]
nop 
mov r11, qword ptr [r15 + 0x110]
blsi r9d, ebp
xor ebp, r9d
inc r8d
mov dword ptr [r10 + 0x74], r8d
test dword ptr [r11], eax
test ebp, ebp
jne 0x7f09771e42c0
jmp 0x7f09771e42a0
mov rsi, rax
add rsp, 0x30
pop rbp
jmp 0x7f096f745100

然后单独Dfs拿gcc Ofast是这样:

static cnt
Dfs(int, int, int, int):
        push    r15
        push    r14
        push    r13
        push    r12
        push    rbp
        push    rbx
        mov     ebx, edx
        or      ebx, ecx
        or      ebx, esi
        not     ebx
        sub     rsp, 24
        and     ebx, 32767
        je      .L1
        cmp     edi, 15
        jne     .L13
        mov     edx, DWORD PTR cnt[rip]
.L4:
        mov     eax, ebx
        mov     ecx, ebx
        add     edx, 1
        neg     eax
        and     eax, ebx
        xor     ebx, eax
        cmp     eax, ecx
        jne     .L4
        mov     DWORD PTR cnt[rip], edx
.L1:
        add     rsp, 24
        pop     rbx
        pop     rbp
        pop     r12
        pop     r13
        pop     r14
        pop     r15
        ret
.L13:
        mov     ebp, esi
        mov     r12d, edx
        lea     r15d, [rdi+1]
        mov     r13d, ecx
.L3:
        mov     eax, ebx
        mov     ecx, r13d
        mov     edx, r12d
        mov     esi, ebp
        neg     eax
        mov     edi, r15d
        mov     r14d, ebx
        and     eax, ebx
        or      ecx, eax
        or      edx, eax
        or      esi, eax
        mov     DWORD PTR [rsp+12], eax
        add     ecx, ecx
        sar     edx
        xor     ebx, eax
        call    Dfs(int, int, int, int)
        mov     eax, DWORD PTR [rsp+12]
        cmp     r14d, eax
        jne     .L3
        add     rsp, 24
        pop     rbx
        pop     rbp
        pop     r12
        pop     r13
        pop     r14
        pop     r15
        ret

看起来大体结构是相似的,自只有些小区别:

  • 一些指令的使用不同,比如gcc的not+and+je和jvm的mov+andn+test+je,还有jvm用了blsi

  • gcc第二个跳转是jne,jvm是je,分支的顺序调换了一下(可能是PGO的效果,觉得row==n几率比较小?)

  • gcc pop比较多……

@oxalica
Copy link

oxalica commented Mar 19, 2021

Sources:

dfs.c

#include <stdio.h>

static const int n = 15;
static int cnt;

static void Dfs (int row, int shu, int pie, int na) {
    int ave = ((1 << n) - 1) & ~(shu | pie | na);
    while (ave) {
        int p = ave & -ave;
        ave ^= p;
        if (row == n)
            ++cnt;
        else
            Dfs(row + 1, shu | p, (pie | p) >> 1, (na | p) << 1);
    }
}

int main () {
    Dfs(1, 0, 0, 0);
    printf("%d\n", cnt);
}

Dfs.java

public class Dfs {
    private static final int n = 15;
    private static int cnt = 0;

    private static void Dfs (int row, int shu, int pie, int na) {
        int ave = ((1 << n) - 1) & ~(shu | pie | na);
        while (ave != 0) {
            int p = ave & -ave;
            ave ^= p;
            if (row == n)
                ++cnt;
            else
                Dfs(row + 1, shu | p, (pie | p) >> 1, (na | p) << 1);
        }
    }


    public static void main(String[] args) {
        Dfs(1, 0, 0, 0);
        System.out.println(cnt);
    }
}

dfs.rs

const N: i32 = 15;

static mut CNT: i32 = 0;

fn dfs(row: i32, shu: i32, pie: i32, na: i32) {
    let mut ave = ((1 << N) - 1) & !(shu | pie | na);
    while ave != 0 {
        let p = ave & -ave;
        ave ^= p;
        if row == N {
            unsafe { CNT += 1 };
        } else {
            dfs(row + 1, shu | p, (pie | p) >> 1, (na | p) << 1);
        }
    }
}

fn main() {
    dfs(1, 0, 0, 0);
    println!("{}", unsafe { CNT });
}

Dfs.hs:

{-# LANGUAGE BangPatterns #-}
import Data.IORef
import Data.Bits
import GHC.IO

n = 15

{-# NOINLINE cnt #-}
cnt = unsafePerformIO $ newIORef 0

dfs :: Int -> Int -> Int -> Int -> IO ()
dfs !row !shu !pie !na = go (((1 `shiftL` n) - 1) .&. complement (shu .|. pie .|. na))
  where go 0    = pure ()
        go !ave = if row == n
                    then modifyIORef cnt (+1)
                    else do
                      dfs (row + 1) (shu .|. p) ((pie .|. p) `shiftR` 1) ((na .|. p) `shiftL` 1)
                      go $ ave `xor` p
          where !p = ave .&. (-ave)

    
main = do
    dfs 1 0 0 0
    cnt' <- readIORef cnt
    print cnt'

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment