Skip to content

Instantly share code, notes, and snippets.

@zbjornson
Created June 20, 2016 17:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zbjornson/0587e03a37fe503d05901f8b728d0ff2 to your computer and use it in GitHub Desktop.
Save zbjornson/0587e03a37fe503d05901f8b728d0ff2 to your computer and use it in GitHub Desktop.
swap32 without alignment check
void Swap32(const FunctionCallbackInfo<Value>& args) {
00007FF79E253100 push rbx
00007FF79E253102 push rdi
00007FF79E253103 sub rsp,28h
00007FF79E253107 mov rdi,rcx
Environment* env = Environment::GetCurrent(args);
00007FF79E25310A call node::Environment::GetCurrent (07FF79E22D240h)
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
00007FF79E25310F cmp dword ptr [rdi+10h],0
Environment* env = Environment::GetCurrent(args);
00007FF79E253113 mov rbx,rax
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
00007FF79E253116 jg node::Buffer::Swap32+25h (07FF79E253125h)
00007FF79E253118 mov rdx,qword ptr [rdi]
00007FF79E25311B mov rcx,qword ptr [rdx+8]
00007FF79E25311F add rcx,60h
00007FF79E253123 jmp node::Buffer::Swap32+29h (07FF79E253129h)
00007FF79E253125 mov rcx,qword ptr [rdi+8]
00007FF79E253129 mov rdx,qword ptr [rcx]
00007FF79E25312C movzx eax,dl
00007FF79E25312F and al,3
00007FF79E253131 cmp al,1
00007FF79E253133 jne node::Buffer::Swap32+208h (07FF79E253308h)
00007FF79E253139 mov rax,qword ptr [rdx-1]
00007FF79E25313D cmp byte ptr [rax+0Bh],0BDh
00007FF79E253141 jne node::Buffer::Swap32+208h (07FF79E253308h)
00007FF79E253147 mov rcx,qword ptr [rcx]
00007FF79E25314A call v8::internal::JSTypedArray::type (07FF79E62E340h)
00007FF79E25314F cmp eax,2
00007FF79E253152 jne node::Buffer::Swap32+208h (07FF79E253308h)
SPREAD_ARG(args[0], ts_obj);
00007FF79E253158 cmp dword ptr [rdi+10h],0
00007FF79E25315C mov qword ptr [rsp+48h],rbp
00007FF79E253161 mov qword ptr [rsp+50h],rsi
00007FF79E253166 mov qword ptr [rsp+20h],r14
00007FF79E25316B jg node::Buffer::Swap32+7Ah (07FF79E25317Ah)
00007FF79E25316D mov rax,qword ptr [rdi]
00007FF79E253170 mov rcx,qword ptr [rax+8]
00007FF79E253174 add rcx,60h
00007FF79E253178 jmp node::Buffer::Swap32+7Eh (07FF79E25317Eh)
00007FF79E25317A mov rcx,qword ptr [rdi+8]
00007FF79E25317E mov rdx,qword ptr [rcx]
00007FF79E253181 movzx eax,dl
00007FF79E253184 and al,3
00007FF79E253186 cmp al,1
00007FF79E253188 jne node::Buffer::Swap32+0A1h (07FF79E2531A1h)
00007FF79E25318A mov rax,qword ptr [rdx-1]
00007FF79E25318E cmp byte ptr [rax+0Bh],0BDh
00007FF79E253192 jne node::Buffer::Swap32+0A1h (07FF79E2531A1h)
00007FF79E253194 mov rcx,qword ptr [rcx]
00007FF79E253197 call v8::internal::JSTypedArray::type (07FF79E62E340h)
00007FF79E25319C cmp eax,2
00007FF79E25319F je node::Buffer::Swap32+0BAh (07FF79E2531BAh)
00007FF79E2531A1 mov r8d,4D1h
00007FF79E2531A7 lea rdx,[string L"src\\node_buffer.cc" (07FF79EFB47B0h)]
00007FF79E2531AE lea rcx,[string L"(args[0])->IsUint8Ar"... (07FF79EFB4B10h)]
00007FF79E2531B5 call _wassert (07FF79EB14464h)
00007FF79E2531BA cmp dword ptr [rdi+10h],0
00007FF79E2531BE jg node::Buffer::Swap32+0CDh (07FF79E2531CDh)
00007FF79E2531C0 mov rax,qword ptr [rdi]
00007FF79E2531C3 mov rbp,qword ptr [rax+8]
00007FF79E2531C7 add rbp,60h
00007FF79E2531CB jmp node::Buffer::Swap32+0D1h (07FF79E2531D1h)
00007FF79E2531CD mov rbp,qword ptr [rdi+8]
00007FF79E2531D1 lea rdx,[rsp+40h]
00007FF79E2531D6 mov rcx,rbp
00007FF79E2531D9 call v8::ArrayBufferView::Buffer (07FF79E4D1220h)
00007FF79E2531DE mov rcx,rbp
00007FF79E2531E1 mov rbx,rax
00007FF79E2531E4 call v8::ArrayBufferView::ByteLength (07FF79E4D15D0h)
00007FF79E2531E9 mov rcx,rbp
00007FF79E2531EC mov rsi,rax
00007FF79E2531EF call v8::ArrayBufferView::ByteOffset (07FF79E4D1560h)
00007FF79E2531F4 mov rcx,qword ptr [rbx]
00007FF79E2531F7 mov r14,rax
00007FF79E2531FA mov rbp,qword ptr [rsp+48h]
00007FF79E2531FF mov rdx,qword ptr [rcx]
00007FF79E253202 add r14,qword ptr [rdx+1Fh]
00007FF79E253206 test rsi,rsi
00007FF79E253209 je node::Buffer::Swap32+129h (07FF79E253229h)
00007FF79E25320B test r14,r14
00007FF79E25320E jne node::Buffer::Swap32+129h (07FF79E253229h)
00007FF79E253210 mov r8d,4D1h
00007FF79E253216 lea rdx,[string L"src\\node_buffer.cc" (07FF79EFB47B0h)]
00007FF79E25321D lea rcx,[string L"(ts_obj_data) != (nu"... (07FF79EFB4A60h)]
00007FF79E253224 call _wassert (07FF79EB14464h)
CHECK_EQ(ts_obj_length % 4, 0);
00007FF79E253229 test sil,3
00007FF79E25322D je node::Buffer::Swap32+148h (07FF79E253248h)
00007FF79E25322F mov r8d,4D3h
00007FF79E253235 lea rdx,[string L"src\\node_buffer.cc" (07FF79EFB47B0h)]
00007FF79E25323C lea rcx,[string L"(ts_obj_length % 4) "... (07FF79EFB4EB0h)]
00007FF79E253243 call _wassert (07FF79EB14464h)
int align = reinterpret_cast<uintptr_t>(ts_obj_data) % sizeof(uint32_t);
align = 0;
if (align == 0) {
uint32_t* data32 = reinterpret_cast<uint32_t*>(ts_obj_data);
size_t len32 = ts_obj_length / 4;
00007FF79E253248 shr rsi,2
for (size_t i = 0; i < len32; i++) {
00007FF79E25324C xor ecx,ecx
00007FF79E25324E test rsi,rsi
00007FF79E253251 je node::Buffer::Swap32+1C2h (07FF79E2532C2h)
00007FF79E253253 cmp rsi,8
00007FF79E253257 jb node::Buffer::Swap32+1A3h (07FF79E2532A3h)
data32[i] = BSWAP_INTRINSIC_4(data32[i]);
00007FF79E253259 cmp dword ptr [__isa_available (07FF79F1630A0h)],2 <--- begin
00007FF79E253260 jl node::Buffer::Swap32+1A3h (07FF79E2532A3h)
for (size_t i = 0; i < len32; i++) {
00007FF79E253262 mov rax,rsi
00007FF79E253265 mov rdx,rsi
00007FF79E253268 and eax,7
00007FF79E25326B sub rdx,rax
00007FF79E25326E mov rax,r14
data32[i] = BSWAP_INTRINSIC_4(data32[i]);
00007FF79E253271 movdqu xmm0,xmmword ptr [rax]
00007FF79E253275 add rcx,8
00007FF79E253279 lea rax,[rax+20h]
00007FF79E25327D pshufb xmm0,xmmword ptr [__xmm@0c0d0e0f08090a0b0405060700010203 (07FF79F0A1750h)]
00007FF79E253286 movdqu xmmword ptr [rax-20h],xmm0
00007FF79E25328B movdqu xmm0,xmmword ptr [rax-10h]
00007FF79E253290 pshufb xmm0,xmmword ptr [__xmm@0c0d0e0f08090a0b0405060700010203 (07FF79F0A1750h)]
00007FF79E253299 movdqu xmmword ptr [rax-10h],xmm0
00007FF79E25329E cmp rcx,rdx
00007FF79E2532A1 jb node::Buffer::Swap32+171h (07FF79E253271h) <--- begin
for (size_t i = 0; i < len32; i++) {
00007FF79E2532A3 cmp rcx,rsi
00007FF79E2532A6 jae node::Buffer::Swap32+1C2h (07FF79E2532C2h)
00007FF79E2532A8 nop dword ptr [rax+rax]
data32[i] = BSWAP_INTRINSIC_4(data32[i]);
00007FF79E2532B0 mov eax,dword ptr [r14+rcx*4]
00007FF79E2532B4 bswap eax
00007FF79E2532B6 mov dword ptr [r14+rcx*4],eax
00007FF79E2532BA inc rcx
00007FF79E2532BD cmp rcx,rsi
00007FF79E2532C0 jb node::Buffer::Swap32+1B0h (07FF79E2532B0h)
}
} else {
for (size_t i = 0; i < ts_obj_length; i += 4) {
std::swap(ts_obj_data[i], ts_obj_data[i + 3]);
std::swap(ts_obj_data[i + 1], ts_obj_data[i + 2]);
}
}
args.GetReturnValue().Set(args[0]);
00007FF79E2532C2 cmp dword ptr [rdi+10h],0
00007FF79E2532C6 mov r14,qword ptr [rsp+20h]
00007FF79E2532CB mov rsi,qword ptr [rsp+50h]
00007FF79E2532D0 jg node::Buffer::Swap32+1DFh (07FF79E2532DFh)
00007FF79E2532D2 mov rcx,qword ptr [rdi]
00007FF79E2532D5 mov rcx,qword ptr [rcx+8]
00007FF79E2532D9 add rcx,60h
00007FF79E2532DD jmp node::Buffer::Swap32+1E3h (07FF79E2532E3h)
00007FF79E2532DF mov rcx,qword ptr [rdi+8]
00007FF79E2532E3 mov rax,qword ptr [rdi]
00007FF79E2532E6 test rcx,rcx
00007FF79E2532E9 jne node::Buffer::Swap32+1FAh (07FF79E2532FAh)
00007FF79E2532EB mov rcx,qword ptr [rax+10h]
00007FF79E2532EF mov qword ptr [rax+18h],rcx
}
00007FF79E2532F3 add rsp,28h
00007FF79E2532F7 pop rdi
00007FF79E2532F8 pop rbx
00007FF79E2532F9 ret
}
} else {
for (size_t i = 0; i < ts_obj_length; i += 4) {
std::swap(ts_obj_data[i], ts_obj_data[i + 3]);
std::swap(ts_obj_data[i + 1], ts_obj_data[i + 2]);
}
}
args.GetReturnValue().Set(args[0]);
00007FF79E2532FA mov rdx,qword ptr [rcx]
00007FF79E2532FD mov qword ptr [rax+18h],rdx
}
00007FF79E253301 add rsp,28h
00007FF79E253305 pop rdi
00007FF79E253306 pop rbx
00007FF79E253307 ret
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
00007FF79E253308 mov rcx,qword ptr [rbx]
00007FF79E25330B lea rdx,[string "argument should be a Buffer" (07FF79EFB4A00h)]
}
00007FF79E253312 add rsp,28h
00007FF79E253316 pop rdi
00007FF79E253317 pop rbx
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
00007FF79E253318 jmp node::Environment::ThrowTypeError (07FF79E22D480h)
void Swap32(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
SPREAD_ARG(args[0], ts_obj);
CHECK_EQ(ts_obj_length % 4, 0);
int align = reinterpret_cast<uintptr_t>(ts_obj_data) % sizeof(uint32_t);
align = 0; // This is enough to achieve the perf gains
if (align == 0) {
uint32_t* data32 = reinterpret_cast<uint32_t*>(ts_obj_data);
size_t len32 = ts_obj_length / 4;
for (size_t i = 0; i < len32; i++) {
data32[i] = BSWAP_INTRINSIC_4(data32[i]);
}
} else {
for (size_t i = 0; i < ts_obj_length; i += 4) {
std::swap(ts_obj_data[i], ts_obj_data[i + 3]);
std::swap(ts_obj_data[i + 1], ts_obj_data[i + 2]);
}
}
args.GetReturnValue().Set(args[0]);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment