Skip to content

Instantly share code, notes, and snippets.

@pervognsen

pervognsen/branchless_merge.c Secret

Last active Jul 17, 2021
Embed
What would you like to do?
// Branchless merge (7 cycles/elem on Zen 3)
void merge1(u32 *dest, const u32 *src1, const u32 *end1, const u32 *src2, const u32 *end2) {
for (;;) {
if (unlikely(src1 == end1)) {
memcpy(dest, src2, (end2 - src2) * sizeof(u32));
return;
}
if (unlikely(src2 == end2)) {
memcpy(dest, src1, (end1 - src1) * sizeof(u32));
return;
}
u32 val1 = *src1;
u32 val2 = *src2;
int cond = val1 <= val2;
*dest++ = cond ? val1 : val2;
src1 = cond ? src1 + 1 : src1;
src2 = cond ? src2 : src2 + 1;
}
}
// Bidirectional branchless merge (3.5 cycles/elem on Zen 3)
void merge2(u32 *dest, const u32 *src1, const u32 *end1, const u32 *src2, const u32 *end2) {
u32 *end = dest + (end1 - src1) + (end2 - src2);
for (;;) {
if (unlikely(src1 + 1 >= end1 || src2 + 1 >= end2))
return merge1(dest, src1, end1, src2, end2);
// Front
u32 val1 = *src1;
u32 val2 = *src2;
int cond = val1 <= val2;
*dest++ = cond ? val1 : val2;
src1 = cond ? src1 + 1 : src1;
src2 = cond ? src2 : src2 + 1;
// Back
val1 = end1[-1];
val2 = end2[-1];
cond = val1 > val2;
*--end = cond ? val1 : val2;
end1 = cond ? end1 - 1 : end1;
end2 = cond ? end2 : end2 - 1;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment