Skip to content

Instantly share code, notes, and snippets.

@cartazio
Created April 10, 2013 00:27
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cartazio/5350669 to your computer and use it in GitHub Desktop.
Save cartazio/5350669 to your computer and use it in GitHub Desktop.
nice assembly generated from functional haskell code

#ghc asm backend

                                              _s1Mj_info:
0x00000000000018c0 4983C410                        addq       $0x10, %r12
0x00000000000018c4 4D3BA590000000                  cmpq       %ds:0x90(%r13), %r12
0x00000000000018cb 0F873D010000                    jal        $0x1a0e
0x00000000000018d1 48B8FFFF00000000FFFF            movq       $0xffff00000000ffff, %rax
0x00000000000018db 488B4B07                        movq       %ds:0x7(%rbx), %rcx
0x00000000000018df 4821C1                          andq       %rax, %rcx
0x00000000000018e2 B80000FFFF                      movl       $0xffff0000, %eax
0x00000000000018e7 488B5307                        movq       %ds:0x7(%rbx), %rdx
0x00000000000018eb 48C1EA10                        shrq       $0x10, %rdx
0x00000000000018ef 4821C2                          andq       %rax, %rdx
0x00000000000018f2 4809CA                          orq        %rcx, %rdx
0x00000000000018f5 B80000FFFF                      movl       $0xffff0000, %eax
0x00000000000018fa 488B5B07                        movq       %ds:0x7(%rbx), %rbx
0x00000000000018fe 4821C3                          andq       %rax, %rbx
0x0000000000001901 48C1E310                        shlq       $0x10, %rbx
0x0000000000001905 4809D3                          orq        %rdx, %rbx
0x0000000000001908 48B8FF0000FFFF0000FF            movq       $0xff0000ffff0000ff, %rax
0x0000000000001912 4889D9                          movq       %rbx, %rcx
0x0000000000001915 4821C1                          andq       %rax, %rcx
0x0000000000001918 48B800FF000000FF0000            movq       $0xff000000ff00, %rax
0x0000000000001922 4889DA                          movq       %rbx, %rdx
0x0000000000001925 48C1EA08                        shrq       $0x8, %rdx
0x0000000000001929 4821C2                          andq       %rax, %rdx
0x000000000000192c 4809CA                          orq        %rcx, %rdx
0x000000000000192f 48B800FF000000FF0000            movq       $0xff000000ff00, %rax
0x0000000000001939 4821C3                          andq       %rax, %rbx
0x000000000000193c 48C1E308                        shlq       $0x8, %rbx
0x0000000000001940 4809D3                          orq        %rdx, %rbx
0x0000000000001943 48B80FF00FF00FF00FF0            movq       $0xf00ff00ff00ff00f, %rax
0x000000000000194d 4889D9                          movq       %rbx, %rcx
0x0000000000001950 4821C1                          andq       %rax, %rcx
0x0000000000001953 48B8F000F000F000F000            movq       $0xf000f000f000f0, %rax
0x000000000000195d 4889DA                          movq       %rbx, %rdx
0x0000000000001960 48C1EA04                        shrq       $0x4, %rdx
0x0000000000001964 4821C2                          andq       %rax, %rdx
0x0000000000001967 4809CA                          orq        %rcx, %rdx
0x000000000000196a 48B8F000F000F000F000            movq       $0xf000f000f000f0, %rax
0x0000000000001974 4821C3                          andq       %rax, %rbx
0x0000000000001977 48C1E304                        shlq       $0x4, %rbx
0x000000000000197b 4809D3                          orq        %rdx, %rbx
0x000000000000197e 48B8C3C3C3C3C3C3C3C3            movq       $0xc3c3c3c3c3c3c3c3, %rax
0x0000000000001988 4889D9                          movq       %rbx, %rcx
0x000000000000198b 4821C1                          andq       %rax, %rcx
0x000000000000198e 48B80C0C0C0C0C0C0C0C            movq       $0xc0c0c0c0c0c0c0c, %rax
0x0000000000001998 4889DA                          movq       %rbx, %rdx
0x000000000000199b 48C1EA02                        shrq       $0x2, %rdx
0x000000000000199f 4821C2                          andq       %rax, %rdx
0x00000000000019a2 4809CA                          orq        %rcx, %rdx
0x00000000000019a5 48B80C0C0C0C0C0C0C0C            movq       $0xc0c0c0c0c0c0c0c, %rax
0x00000000000019af 4821C3                          andq       %rax, %rbx
0x00000000000019b2 48C1E302                        shlq       $0x2, %rbx
0x00000000000019b6 4809D3                          orq        %rdx, %rbx
0x00000000000019b9 488D0500000000                  leaq       %ds:0x19c0, %rax
0x00000000000019c0 49894424F8                      movq       %rax, %ds:0xfffffffffffffff8(%r12) ; XREF=0x19b9
0x00000000000019c5 48B89999999999999999            movq       $0x9999999999999999, %rax
0x00000000000019cf 4889D9                          movq       %rbx, %rcx
0x00000000000019d2 4821C1                          andq       %rax, %rcx
0x00000000000019d5 48B82222222222222222            movq       $0x2222222222222222, %rax
0x00000000000019df 4889DA                          movq       %rbx, %rdx
0x00000000000019e2 48D1EA                          shrq       $0x1, %rdx
0x00000000000019e5 4821C2                          andq       %rax, %rdx
0x00000000000019e8 4809CA                          orq        %rcx, %rdx
0x00000000000019eb 48B82222222222222222            movq       $0x2222222222222222, %rax
0x00000000000019f5 4821C3                          andq       %rax, %rbx
0x00000000000019f8 48D1E3                          shlq       $0x1, %rbx
0x00000000000019fb 4809D3                          orq        %rdx, %rbx
0x00000000000019fe 49891C24                        movq       %rbx, %ds:%r12

LLVM generated assembly

                                              _s1yV_info:
0x00000000000018c0 4C89E0                          movq       %r12, %rax
0x00000000000018c3 4C8D6010                        leaq       %ds:0x10(%rax), %r12
0x00000000000018c7 4D3BA590000000                  cmpq       %ds:0x90(%r13), %r12
0x00000000000018ce 7611                            jbel       $0x18e1
0x00000000000018d0 49C785C000000010000000          movq       $0x10, %ds:0xc0(%r13)
0x00000000000018db 498B45F0                        movq       %ds:0xfffffffffffffff0(%r13), %rax
0x00000000000018df FFE0                            jmpq       *%rax
0x00000000000018e1 488B7B07                        movq       %ds:0x7(%rbx), %rdi         ; XREF=0x18ce
0x00000000000018e5 48BAFFFF00000000FFFF            movq       $0xffff00000000ffff, %rdx
0x00000000000018ef 4821FA                          andq       %rdi, %rdx
0x00000000000018f2 4889F9                          movq       %rdi, %rcx
0x00000000000018f5 48C1E910                        shrq       $0x10, %rcx
0x00000000000018f9 81E10000FFFF                    andl       $0xffff0000, %ecx
0x00000000000018ff 48C1E710                        shlq       $0x10, %rdi
0x0000000000001903 48BE00000000FFFF0000            movq       $0xffff00000000, %rsi
0x000000000000190d 4821FE                          andq       %rdi, %rsi
0x0000000000001910 4809D1                          orq        %rdx, %rcx
0x0000000000001913 488B3D00000000                  movq       %ds:0x191a, %rdi
0x000000000000191a 48BBFF0000FFFF0000FF            movq       $0xff0000ffff0000ff, %rbx   ; XREF=0x1913
0x0000000000001924 48BA0000FF000000FF00            movq       $0xff000000ff0000, %rdx
0x000000000000192e 49BA0FF00FF00FF00FF0            movq       $0xf00ff00ff00ff00f, %r10
0x0000000000001938 49BB000F000F000F000F            movq       $0xf000f000f000f00, %r11
0x0000000000001942 49BE00FF000000FF0000            movq       $0xff000000ff00, %r14
0x000000000000194c 48897808                        movq       %rdi, %ds:0x8(%rax)
0x0000000000001950 4809CE                          orq        %rcx, %rsi
0x0000000000001953 4889F7                          movq       %rsi, %rdi
0x0000000000001956 48C1E708                        shlq       $0x8, %rdi
0x000000000000195a 4821D7                          andq       %rdx, %rdi
0x000000000000195d 49B9C3C3C3C3C3C3C3C3            movq       $0xc3c3c3c3c3c3c3c3, %r9
0x0000000000001967 4821DE                          andq       %rbx, %rsi
0x000000000000196a 48C1E908                        shrq       $0x8, %rcx
0x000000000000196e 49B89999999999999999            movq       $0x9999999999999999, %r8
0x0000000000001978 4C21F1                          andq       %r14, %rcx
0x000000000000197b 48B80C0C0C0C0C0C0C0C            movq       $0xc0c0c0c0c0c0c0c, %rax
0x0000000000001985 4809F1                          orq        %rsi, %rcx
0x0000000000001988 4809CF                          orq        %rcx, %rdi
0x000000000000198b 4889FE                          movq       %rdi, %rsi
0x000000000000198e 48C1E604                        shlq       $0x4, %rsi
0x0000000000001992 4C21DE                          andq       %r11, %rsi
0x0000000000001995 48BB3030303030303030            movq       $0x3030303030303030, %rbx
0x000000000000199f 4C21D7                          andq       %r10, %rdi
0x00000000000019a2 48C1E904                        shrq       $0x4, %rcx
0x00000000000019a6 48BAF000F000F000F000            movq       $0xf000f000f000f0, %rdx
0x00000000000019b0 4821CA                          andq       %rcx, %rdx
0x00000000000019b3 4809FA                          orq        %rdi, %rdx
0x00000000000019b6 4809D6                          orq        %rdx, %rsi
0x00000000000019b9 488D0CB500000000                leaq       %ds:0x0(,%rsi,4), %rcx
0x00000000000019c1 4821D9                          andq       %rbx, %rcx
0x00000000000019c4 4C21CE                          andq       %r9, %rsi
0x00000000000019c7 48C1EA02                        shrq       $0x2, %rdx
0x00000000000019cb 48BF2222222222222222            movq       $0x2222222222222222, %rdi
0x00000000000019d5 4821C2                          andq       %rax, %rdx
0x00000000000019d8 48BB4444444444444444            movq       $0x4444444444444444, %rbx
0x00000000000019e2 4809F2                          orq        %rsi, %rdx
0x00000000000019e5 4809D1                          orq        %rdx, %rcx
0x00000000000019e8 488D0409                        leaq       %ds:(%rcx,%rcx), %rax
0x00000000000019ec 4821D8                          andq       %rbx, %rax
0x00000000000019ef 4C21C1                          andq       %r8, %rcx
0x00000000000019f2 48D1EA                          shrq       $0x1, %rdx
0x00000000000019f5 4821FA                          andq       %rdi, %rdx
0x00000000000019f8 4809CA                          orq        %rcx, %rdx
0x00000000000019fb 4809C2                          orq        %rax, %rdx
0x00000000000019fe 49891424                        movq       %rdx, %ds:%r12
0x0000000000001a02 488B4508                        movq       %ss:0x8(%rbp), %rax
0x0000000000001a06 488D6D08                        leaq       %ss:0x8(%rbp), %rbp
0x0000000000001a0a 498D5C24F9                      leaq       %ds:0xfffffffffffffff9(%r12), %rbx
0x0000000000001a0f FFE0                            jmpq       *%rax
0x0000000000001a11 0F1F8000000000                  nopl       %ds:0x0(%rax)

the haskell code

outerShuffle64A :: Word -> Word
outerShuffle64A !x =
--- the 16 shift should be conditional
    case      ((x .&. 0x00000000FFFF0000) << 16 )
     .|. ((x>>16) .&. 0x00000000FFFF0000) .|. (x .&. 0xFFFF00000000FFFF) of
      
      x->  case ((x .&. 0x0000FF000000FF00 ) <<  8 )
       .|. (x >> 8) .&. 0x0000FF000000FF00 .|. (x  .&. 0xFF0000FFFF0000FF) of 

        x -> case (( x .&. 0x00F000F000F000F0 ) << 4 )
          .|. (x >> 4) .&. 0x00F000F000F000F0 .|. (x .&. 0xF00FF00FF00FF00F ) of 

          x->case   ((x .&.  0x0C0C0C0C0C0C0C0C )<< 2 )
            .|. (x >> 2) .&. 0x0C0C0C0C0C0C0C0C .|.( x .&. 0xC3C3C3C3C3C3C3C3) of  

            x-> case   ( (x .&. 0x2222222222222222)  << 1 ) 
                .|. (x>> 1) .&. 0x2222222222222222 .|. (x .&. 0x9999999999999999) of 
                    res -> res
{-# INLINE outerShuffle64A #-}

What i find pretty neat is that the LLVM code uses more of the available registers. Still need to do some benchmarks on it though :)

@cartazio
Copy link
Author

outershuffle64a repeated 1,000 times takes an average of 8.7 micro seconds, for the llvm code. pretty good!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment