Created
June 20, 2016 17:51
-
-
Save zbjornson/0587e03a37fe503d05901f8b728d0ff2 to your computer and use it in GitHub Desktop.
swap32 without alignment check
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void Swap32(const FunctionCallbackInfo<Value>& args) { | |
00007FF79E253100 push rbx | |
00007FF79E253102 push rdi | |
00007FF79E253103 sub rsp,28h | |
00007FF79E253107 mov rdi,rcx | |
Environment* env = Environment::GetCurrent(args); | |
00007FF79E25310A call node::Environment::GetCurrent (07FF79E22D240h) | |
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); | |
00007FF79E25310F cmp dword ptr [rdi+10h],0 | |
Environment* env = Environment::GetCurrent(args); | |
00007FF79E253113 mov rbx,rax | |
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); | |
00007FF79E253116 jg node::Buffer::Swap32+25h (07FF79E253125h) | |
00007FF79E253118 mov rdx,qword ptr [rdi] | |
00007FF79E25311B mov rcx,qword ptr [rdx+8] | |
00007FF79E25311F add rcx,60h | |
00007FF79E253123 jmp node::Buffer::Swap32+29h (07FF79E253129h) | |
00007FF79E253125 mov rcx,qword ptr [rdi+8] | |
00007FF79E253129 mov rdx,qword ptr [rcx] | |
00007FF79E25312C movzx eax,dl | |
00007FF79E25312F and al,3 | |
00007FF79E253131 cmp al,1 | |
00007FF79E253133 jne node::Buffer::Swap32+208h (07FF79E253308h) | |
00007FF79E253139 mov rax,qword ptr [rdx-1] | |
00007FF79E25313D cmp byte ptr [rax+0Bh],0BDh | |
00007FF79E253141 jne node::Buffer::Swap32+208h (07FF79E253308h) | |
00007FF79E253147 mov rcx,qword ptr [rcx] | |
00007FF79E25314A call v8::internal::JSTypedArray::type (07FF79E62E340h) | |
00007FF79E25314F cmp eax,2 | |
00007FF79E253152 jne node::Buffer::Swap32+208h (07FF79E253308h) | |
SPREAD_ARG(args[0], ts_obj); | |
00007FF79E253158 cmp dword ptr [rdi+10h],0 | |
00007FF79E25315C mov qword ptr [rsp+48h],rbp | |
00007FF79E253161 mov qword ptr [rsp+50h],rsi | |
00007FF79E253166 mov qword ptr [rsp+20h],r14 | |
00007FF79E25316B jg node::Buffer::Swap32+7Ah (07FF79E25317Ah) | |
00007FF79E25316D mov rax,qword ptr [rdi] | |
00007FF79E253170 mov rcx,qword ptr [rax+8] | |
00007FF79E253174 add rcx,60h | |
00007FF79E253178 jmp node::Buffer::Swap32+7Eh (07FF79E25317Eh) | |
00007FF79E25317A mov rcx,qword ptr [rdi+8] | |
00007FF79E25317E mov rdx,qword ptr [rcx] | |
00007FF79E253181 movzx eax,dl | |
00007FF79E253184 and al,3 | |
00007FF79E253186 cmp al,1 | |
00007FF79E253188 jne node::Buffer::Swap32+0A1h (07FF79E2531A1h) | |
00007FF79E25318A mov rax,qword ptr [rdx-1] | |
00007FF79E25318E cmp byte ptr [rax+0Bh],0BDh | |
00007FF79E253192 jne node::Buffer::Swap32+0A1h (07FF79E2531A1h) | |
00007FF79E253194 mov rcx,qword ptr [rcx] | |
00007FF79E253197 call v8::internal::JSTypedArray::type (07FF79E62E340h) | |
00007FF79E25319C cmp eax,2 | |
00007FF79E25319F je node::Buffer::Swap32+0BAh (07FF79E2531BAh) | |
00007FF79E2531A1 mov r8d,4D1h | |
00007FF79E2531A7 lea rdx,[string L"src\\node_buffer.cc" (07FF79EFB47B0h)] | |
00007FF79E2531AE lea rcx,[string L"(args[0])->IsUint8Ar"... (07FF79EFB4B10h)] | |
00007FF79E2531B5 call _wassert (07FF79EB14464h) | |
00007FF79E2531BA cmp dword ptr [rdi+10h],0 | |
00007FF79E2531BE jg node::Buffer::Swap32+0CDh (07FF79E2531CDh) | |
00007FF79E2531C0 mov rax,qword ptr [rdi] | |
00007FF79E2531C3 mov rbp,qword ptr [rax+8] | |
00007FF79E2531C7 add rbp,60h | |
00007FF79E2531CB jmp node::Buffer::Swap32+0D1h (07FF79E2531D1h) | |
00007FF79E2531CD mov rbp,qword ptr [rdi+8] | |
00007FF79E2531D1 lea rdx,[rsp+40h] | |
00007FF79E2531D6 mov rcx,rbp | |
00007FF79E2531D9 call v8::ArrayBufferView::Buffer (07FF79E4D1220h) | |
00007FF79E2531DE mov rcx,rbp | |
00007FF79E2531E1 mov rbx,rax | |
00007FF79E2531E4 call v8::ArrayBufferView::ByteLength (07FF79E4D15D0h) | |
00007FF79E2531E9 mov rcx,rbp | |
00007FF79E2531EC mov rsi,rax | |
00007FF79E2531EF call v8::ArrayBufferView::ByteOffset (07FF79E4D1560h) | |
00007FF79E2531F4 mov rcx,qword ptr [rbx] | |
00007FF79E2531F7 mov r14,rax | |
00007FF79E2531FA mov rbp,qword ptr [rsp+48h] | |
00007FF79E2531FF mov rdx,qword ptr [rcx] | |
00007FF79E253202 add r14,qword ptr [rdx+1Fh] | |
00007FF79E253206 test rsi,rsi | |
00007FF79E253209 je node::Buffer::Swap32+129h (07FF79E253229h) | |
00007FF79E25320B test r14,r14 | |
00007FF79E25320E jne node::Buffer::Swap32+129h (07FF79E253229h) | |
00007FF79E253210 mov r8d,4D1h | |
00007FF79E253216 lea rdx,[string L"src\\node_buffer.cc" (07FF79EFB47B0h)] | |
00007FF79E25321D lea rcx,[string L"(ts_obj_data) != (nu"... (07FF79EFB4A60h)] | |
00007FF79E253224 call _wassert (07FF79EB14464h) | |
CHECK_EQ(ts_obj_length % 4, 0); | |
00007FF79E253229 test sil,3 | |
00007FF79E25322D je node::Buffer::Swap32+148h (07FF79E253248h) | |
00007FF79E25322F mov r8d,4D3h | |
00007FF79E253235 lea rdx,[string L"src\\node_buffer.cc" (07FF79EFB47B0h)] | |
00007FF79E25323C lea rcx,[string L"(ts_obj_length % 4) "... (07FF79EFB4EB0h)] | |
00007FF79E253243 call _wassert (07FF79EB14464h) | |
int align = reinterpret_cast<uintptr_t>(ts_obj_data) % sizeof(uint32_t); | |
align = 0; | |
if (align == 0) { | |
uint32_t* data32 = reinterpret_cast<uint32_t*>(ts_obj_data); | |
size_t len32 = ts_obj_length / 4; | |
00007FF79E253248 shr rsi,2 | |
for (size_t i = 0; i < len32; i++) { | |
00007FF79E25324C xor ecx,ecx | |
00007FF79E25324E test rsi,rsi | |
00007FF79E253251 je node::Buffer::Swap32+1C2h (07FF79E2532C2h) | |
00007FF79E253253 cmp rsi,8 | |
00007FF79E253257 jb node::Buffer::Swap32+1A3h (07FF79E2532A3h) | |
data32[i] = BSWAP_INTRINSIC_4(data32[i]); | |
00007FF79E253259 cmp dword ptr [__isa_available (07FF79F1630A0h)],2 <--- begin | |
00007FF79E253260 jl node::Buffer::Swap32+1A3h (07FF79E2532A3h) | |
for (size_t i = 0; i < len32; i++) { | |
00007FF79E253262 mov rax,rsi | |
00007FF79E253265 mov rdx,rsi | |
00007FF79E253268 and eax,7 | |
00007FF79E25326B sub rdx,rax | |
00007FF79E25326E mov rax,r14 | |
data32[i] = BSWAP_INTRINSIC_4(data32[i]); | |
00007FF79E253271 movdqu xmm0,xmmword ptr [rax] | |
00007FF79E253275 add rcx,8 | |
00007FF79E253279 lea rax,[rax+20h] | |
00007FF79E25327D pshufb xmm0,xmmword ptr [__xmm@0c0d0e0f08090a0b0405060700010203 (07FF79F0A1750h)] | |
00007FF79E253286 movdqu xmmword ptr [rax-20h],xmm0 | |
00007FF79E25328B movdqu xmm0,xmmword ptr [rax-10h] | |
00007FF79E253290 pshufb xmm0,xmmword ptr [__xmm@0c0d0e0f08090a0b0405060700010203 (07FF79F0A1750h)] | |
00007FF79E253299 movdqu xmmword ptr [rax-10h],xmm0 | |
00007FF79E25329E cmp rcx,rdx | |
00007FF79E2532A1 jb node::Buffer::Swap32+171h (07FF79E253271h) <--- begin | |
for (size_t i = 0; i < len32; i++) { | |
00007FF79E2532A3 cmp rcx,rsi | |
00007FF79E2532A6 jae node::Buffer::Swap32+1C2h (07FF79E2532C2h) | |
00007FF79E2532A8 nop dword ptr [rax+rax] | |
data32[i] = BSWAP_INTRINSIC_4(data32[i]); | |
00007FF79E2532B0 mov eax,dword ptr [r14+rcx*4] | |
00007FF79E2532B4 bswap eax | |
00007FF79E2532B6 mov dword ptr [r14+rcx*4],eax | |
00007FF79E2532BA inc rcx | |
00007FF79E2532BD cmp rcx,rsi | |
00007FF79E2532C0 jb node::Buffer::Swap32+1B0h (07FF79E2532B0h) | |
} | |
} else { | |
for (size_t i = 0; i < ts_obj_length; i += 4) { | |
std::swap(ts_obj_data[i], ts_obj_data[i + 3]); | |
std::swap(ts_obj_data[i + 1], ts_obj_data[i + 2]); | |
} | |
} | |
args.GetReturnValue().Set(args[0]); | |
00007FF79E2532C2 cmp dword ptr [rdi+10h],0 | |
00007FF79E2532C6 mov r14,qword ptr [rsp+20h] | |
00007FF79E2532CB mov rsi,qword ptr [rsp+50h] | |
00007FF79E2532D0 jg node::Buffer::Swap32+1DFh (07FF79E2532DFh) | |
00007FF79E2532D2 mov rcx,qword ptr [rdi] | |
00007FF79E2532D5 mov rcx,qword ptr [rcx+8] | |
00007FF79E2532D9 add rcx,60h | |
00007FF79E2532DD jmp node::Buffer::Swap32+1E3h (07FF79E2532E3h) | |
00007FF79E2532DF mov rcx,qword ptr [rdi+8] | |
00007FF79E2532E3 mov rax,qword ptr [rdi] | |
00007FF79E2532E6 test rcx,rcx | |
00007FF79E2532E9 jne node::Buffer::Swap32+1FAh (07FF79E2532FAh) | |
00007FF79E2532EB mov rcx,qword ptr [rax+10h] | |
00007FF79E2532EF mov qword ptr [rax+18h],rcx | |
} | |
00007FF79E2532F3 add rsp,28h | |
00007FF79E2532F7 pop rdi | |
00007FF79E2532F8 pop rbx | |
00007FF79E2532F9 ret | |
} | |
} else { | |
for (size_t i = 0; i < ts_obj_length; i += 4) { | |
std::swap(ts_obj_data[i], ts_obj_data[i + 3]); | |
std::swap(ts_obj_data[i + 1], ts_obj_data[i + 2]); | |
} | |
} | |
args.GetReturnValue().Set(args[0]); | |
00007FF79E2532FA mov rdx,qword ptr [rcx] | |
00007FF79E2532FD mov qword ptr [rax+18h],rdx | |
} | |
00007FF79E253301 add rsp,28h | |
00007FF79E253305 pop rdi | |
00007FF79E253306 pop rbx | |
00007FF79E253307 ret | |
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); | |
00007FF79E253308 mov rcx,qword ptr [rbx] | |
00007FF79E25330B lea rdx,[string "argument should be a Buffer" (07FF79EFB4A00h)] | |
} | |
00007FF79E253312 add rsp,28h | |
00007FF79E253316 pop rdi | |
00007FF79E253317 pop rbx | |
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); | |
00007FF79E253318 jmp node::Environment::ThrowTypeError (07FF79E22D480h) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void Swap32(const FunctionCallbackInfo<Value>& args) { | |
Environment* env = Environment::GetCurrent(args); | |
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]); | |
SPREAD_ARG(args[0], ts_obj); | |
CHECK_EQ(ts_obj_length % 4, 0); | |
int align = reinterpret_cast<uintptr_t>(ts_obj_data) % sizeof(uint32_t); | |
align = 0; // This is enough to achieve the perf gains | |
if (align == 0) { | |
uint32_t* data32 = reinterpret_cast<uint32_t*>(ts_obj_data); | |
size_t len32 = ts_obj_length / 4; | |
for (size_t i = 0; i < len32; i++) { | |
data32[i] = BSWAP_INTRINSIC_4(data32[i]); | |
} | |
} else { | |
for (size_t i = 0; i < ts_obj_length; i += 4) { | |
std::swap(ts_obj_data[i], ts_obj_data[i + 3]); | |
std::swap(ts_obj_data[i + 1], ts_obj_data[i + 2]); | |
} | |
} | |
args.GetReturnValue().Set(args[0]); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment