Last active
June 12, 2022 22:39
-
-
Save mrvn/395da7820304a61d8d80f7eff98cc9e4 to your computer and use it in GitHub Desktop.
x86_64 adcx/adox
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cstdint> | |
#include <array> | |
#include <utility> | |
#include <x86intrin.h> | |
using Big = std::array<uint64_t, 4>; | |
/* | |
unsigned char sum(Big & __restrict__ a, const Big & __restrict__ b) { | |
unsigned char c = 0; | |
for (std::size_t i = a.size(); i > 0; --i) { | |
c = _addcarry_u64(c, a[i - 1], b[i - 1], (unsigned long long*)&a[i - 1]); | |
} | |
return c; | |
} | |
*/ | |
unsigned char sum(Big & __restrict__ a, const Big & __restrict__ b) { | |
unsigned char c; | |
uint64_t t; | |
uint64_t *pa = &a[0]; | |
const uint64_t *pb = &b[0]; | |
uint64_t count = a.size(); | |
asm(" xor %1, %1 \n" | |
" 1: \n" | |
" mov -8(%4, %%rcx, 8), %0 \n" | |
" adc -8(%5, %%rcx, 8), %0 \n" | |
" mov %0, -8(%4, %%rcx, 8) \n" | |
" loop 1b \n" | |
" setc %1" | |
: "=&r" (t), "=&r" (c), "+c" (count), "+m" (*(uint64_t(*)[a.size()])pa) | |
: "r" (pa), "r" (pb), "m" (*(uint64_t(*)[b.size()])pb) | |
: "cc" | |
); | |
return c; | |
} | |
std::pair<unsigned char, unsigned char> sum2(Big & __restrict__ a, const Big & __restrict__ b, Big & __restrict__ c, const Big & __restrict__ d) { | |
unsigned char c0, c1; | |
uint64_t t; | |
uint64_t *pa = &a[0]; | |
const uint64_t *pb = &b[0]; | |
uint64_t *pc = &c[0]; | |
const uint64_t *pd = &d[0]; | |
uint64_t count = a.size(); | |
asm(" xor %1, %1 \n" | |
" xor %2, %2 \n" | |
" 1: \n" | |
" mov -8(%6, %%rcx, 8), %0 \n" | |
" adcx -8(%7, %%rcx, 8), %0 \n" | |
" mov %0, -8(%6, %%rcx, 8) \n" | |
" mov -8(%8, %%rcx, 8), %0 \n" | |
" adox -8(%9, %%rcx, 8), %0 \n" | |
" mov %0, -8(%8, %%rcx, 8) \n" | |
" loop 1b \n" | |
" setc %1 \n" | |
" seto %2" | |
: "=&r" (t), "=&r" (c0), "=&r" (c1), "+c" (count), "+m" (*(uint64_t(*)[a.size()])pa), "+m" (*(uint64_t(*)[c.size()])pc) | |
: "r" (pa), "r" (pb), "r" (pc), "r" (pd), "m" (*(uint64_t(*)[b.size()])pb), "m" (*(uint64_t(*)[d.size()])pd) | |
: "cc" | |
); | |
return std::make_pair(c0, c1); | |
} | |
#include <cstdio> | |
int main() { | |
Big a{~0LLU,~0LLU,~0LLU,~0LLU}; | |
Big b{2,3,4,6}; | |
Big c{~0LLU,~0LLU,~0LLU,~0LLU}; | |
Big d{4,6,8,12}; | |
// unsigned char c0 = sum(a, b); | |
// unsigned char c1 = sum(c, d); | |
auto [c0, c1] = sum2(a, b, c, d); | |
//auto [c2, c3] = sum2(a, b, c, d); | |
printf("%x ", c0); | |
for(std::size_t i = 0; i < a.size(); ++i) { | |
printf("%016lx ", a[i]); | |
} | |
printf("\n"); | |
printf("%x ", c1); | |
for(std::size_t i = 0; i < c.size(); ++i) { | |
printf("%016lx ", c[i]); | |
} | |
printf("\n"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment