Skip to content

Instantly share code, notes, and snippets.

@aqrit
Last active March 28, 2017 21:38
Show Gist options
  • Save aqrit/4e33614c64b5be81d88b6a630eb77731 to your computer and use it in GitHub Desktop.
Save aqrit/4e33614c64b5be81d88b6a630eb77731 to your computer and use it in GitHub Desktop.
Base64 decoder SSE2
; Base64 decoder test using SSE2 instructions
; the program doesn't do '=' chars, bad chars, or bounds checks...
;
; License: WTFPL
; by: aqrit <bitpatch.com> in 2016
;
; build instructions:
; yasm -f x64 -m AMD64 base64_decode_sse2_x64.asm
; Golink base64_decode_sse2_x64.obj kernel32.dll user32.dll
;;;;;;;;;;;;;;;;
global start
extern ExitProcess, MessageBoxA
SECTION .rdata
align 16
packed_constants:
dd 0xD4FB1A66, 0x76250546, 0x00030F3F, 0x00401004
caption:
db "decoded moby dick excerpt",0
message:
db "Q2FsbCBtZSBJc2htYWVsLiBTb21lIHllYXJzIGFnby0tbmV2ZXIgbWluZCBob3cgbG9uZ"
db "yBwcmVjaXNlbHktLWhhdmluZwpsaXR0bGUgb3Igbm8gbW9uZXkgaW4gbXkgcHVyc2UsIG"
db "FuZCBub3RoaW5nIHBhcnRpY3VsYXIgdG8gaW50ZXJlc3QgbWUgb24Kc2hvcmUsIEkgdGh"
db "vdWdodCBJIHdvdWxkIHNhaWwgYWJvdXQgYSBsaXR0bGUgYW5kIHNlZSB0aGUgd2F0ZXJ5"
db "IHBhcnQgb2YKdGhlIHdvcmxkLiBJdCBpcyBhIHdheSBJIGhhdmUgb2YgZHJpdmluZyBvZ"
db "mYgdGhlIHNwbGVlbiBhbmQgcmVndWxhdGluZwp0aGUgY2lyY3VsYXRpb24uIFdoZW5ldm"
db "VyIEkgZmluZCBteXNlbGYgZ3Jvd2luZyBncmltIGFib3V0IHRoZSBtb3V0aDsKd2hlbmV"
db "2ZXIgaXQgaXMgYSBkYW1wLCBkcml6emx5IE5vdmVtYmVyIGluIG15IHNvdWw7IHdoZW5l"
db "dmVyIEkgZmluZApteXNlbGYgaW52b2x1bnRhcmlseSBwYXVzaW5nIGJlZm9yZSBjb2Zma"
db "W4gd2FyZWhvdXNlcywgYW5kIGJyaW5naW5nIHVwCnRoZSByZWFyIG9mIGV2ZXJ5IGZ1bm"
db "VyYWwgSSBtZWV0OyBhbmQgZXNwZWNpYWxseSB3aGVuZXZlciBteSBoeXBvcyBnZXQKc3V"
db "jaCBhbiB1cHBlciBoYW5kIG9mIG1lLCB0aGF0IGl0IHJlcXVpcmVzIGEgc3Ryb25nIG1v"
db "cmFsIHByaW5jaXBsZSB0bwpwcmV2ZW50IG1lIGZyb20gZGVsaWJlcmF0ZWx5IHN0ZXBwa"
db "W5nIGludG8gdGhlIHN0cmVldCwgYW5kIG1ldGhvZGljYWxseQprbm9ja2luZyBwZW9wbG"
db "UncyBoYXRzIG9mZi0tdGhlbiwgSSBhY2NvdW50IGl0IGhpZ2ggdGltZSB0byBnZXQgdG8"
db "gc2VhCmFzIHNvb24gYXMgSSBjYW4uIFRoaXMgaXMgbXkgc3Vic3RpdHV0ZSBmb3IgcGlz"
db "dG9sIGFuZCBiYWxsLiBXaXRoIGEKcGhpbG9zb3BoaWNhbCBmbG91cmlzaCBDYXRvIHRoc"
db "m93cyBoaW1zZWxmIHVwb24gaGlzIHN3b3JkOyBJIHF1aWV0bHkKdGFrZSB0byB0aGUgc2"
db "hpcC4gVGhlcmUgaXMgbm90aGluZyBzdXJwcmlzaW5nIGluIHRoaXMuIElmIHRoZXkgYnV"
db "0IGtuZXcKaXQsIGFsbW9zdCBhbGwgbWVuIGluIHRoZWlyIGRlZ3JlZSwgc29tZSB0aW1l"
db "IG9yIG90aGVyLCBjaGVyaXNoIHZlcnkKbmVhcmx5IHRoZSBzYW1lIGZlZWxpbmdzIHRvd"
db "2FyZHMgdGhlIG9jZWFuIHdpdGggbWUuCg=="
SECTION .data
buffer:
times 0x600 db 0 ; reserve space for output
SECTION .text
start:
lea rsi,[message]
lea rdi,[buffer]
;;;;;;;;;; setup constants ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
movaps xmm11,[packed_constants]
pxor xmm6,xmm6
pcmpeqb xmm7,xmm7 ; 0xFF = slash after '0-9' roll down
pshufd xmm14,xmm11,0xFF
pshufd xmm5,xmm11,0xAA ; pack mask
punpcklbw xmm11,xmm11
punpcklbw xmm6,xmm14 ; multiplier
movdqa xmm15,xmm11
punpckhwd xmm11,xmm11
punpcklwd xmm15,xmm15
pshufd xmm8,xmm11,0x00 ; 0x46 = 0x7F - '9'
pshufd xmm9,xmm11,0x55 ; 0x05 = 0x7F - 'z'
pshufd xmm10,xmm11,0xAA ; 0x25 = 0x7F - 'Z'
pshufd xmm11,xmm11,0xFF ; 0x76 = 0x7F - 9 ('9'-'0')
pshufd xmm12,xmm15,0x00 ; 0x66 = 0x7F - 25 ('Z'-'A') ('z'-'a')
pshufd xmm13,xmm15,0x55 ; 0x1A = 26
pshufd xmm14,xmm15,0xAA ; 0xFB = plus sign value after '0-9' roll down
pshufd xmm15,xmm15,0xFF ; 0xD4 = plus sign shift after 'A-Z' roll down
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
align 8
lbl_loop:
movdqu xmm0,oword[rsi] ; read in 16 bytes from src // xmmword ptr
add rsi,16
;;;;;;;;;;;;;;; lookup ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; in: byte array of Base64 characters
; out: byte array of 6-bit values
;
; A-Z to 0-25
; a-z to 26-51
; 0-9 to 52-61
; '+' to 62
; '/' to 63
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
movdqa xmm1,xmm8 ; preserve
movdqa xmm2,xmm9 ; preserve
movdqa xmm3,xmm14 ; preserve
movdqa xmm4,xmm7 ; preserve
paddb xmm1,xmm0 ; "0-9" roll up
paddb xmm2,xmm0 ; "a-z" roll up
psubsb xmm1,xmm11 ; "0-9" roll down
psubsb xmm2,xmm12 ; "a-z" roll down
pcmpeqb xmm4,xmm1 ; match '/' ( foward slash )
pcmpeqb xmm3,xmm1 ; match '+' ( plus sign )
paddusb xmm1,xmm13 ; ('0'=26 thru '9'=35) else signed
psubusb xmm4,xmm10 ; 0xDA = 0xFF - 0x25
paddb xmm0,xmm10 ; "A-Z" roll up
pxor xmm1,xmm4 ; ( '/' = 37 ) = FF ^ DA
pand xmm3,xmm15 ;
psubsb xmm0,xmm12 ; "A-Z" roll down
pminub xmm1,xmm2 ; merge
pxor xmm0,xmm3 ; ( '+' = 62 )
paddusb xmm1,xmm13 ; ('a'=26 thru '9'=61, '/'=63) else signed
pminub xmm0,xmm1 ; merge
;
pmovmskb eax,xmm0 ; set ax if bad char
;;;;;;;;;;;;;;; pack ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;in: [??wwwwww ??xxxxxx ??yyyyyy ??zzzzzz][...][...][...]
;... x4 x3 x2 x1 x0 ?? ?? w5 w4 w3 w2 w1 w0
;
;out: [wwwwwwxx xxxxyyyy yyzzzzzz][...][...][00000000 00000000]
;... y1 y0 z5 z4 z3 z2 z1 z0 x3 x2 x1 x0 y5 y4 y3 y2 w5 w4 w3 w2 w1 w0 x5 x4
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
movdqa xmm2,xmm0
psrld xmm0,6
pand xmm2,xmm5
movdqa xmm1,xmm0
punpcklbw xmm0,xmm2
punpckhbw xmm1,xmm2
pmulhuw xmm0,xmm6
pmulhuw xmm1,xmm6
pshuflw xmm0,xmm0,10010011b
pshuflw xmm1,xmm1,10010011b
pslldq xmm0,2
psrldq xmm1,2
packuswb xmm0,xmm1
psrldq xmm0,2
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
movdqu [rdi],xmm0
add rdi,12
test eax,eax
jz lbl_loop
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; show decoded message... then exit
sub rsp,0x28
xor rcx,rcx
mov rdx,buffer
mov r8,caption
xor r9,r9
call MessageBoxA
;
xor rcx,rcx
call ExitProcess
@aqrit
Copy link
Author

aqrit commented Mar 28, 2017

@mayeut,
yes.
sorry for the late replay, missed your comment somehow

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment