Skip to content

Instantly share code, notes, and snippets.

@madmann91
Created March 8, 2018 10:44
Show Gist options
  • Save madmann91/adedc899db0897e19bad6c0aa722b4da to your computer and use it in GitHub Desktop.
Save madmann91/adedc899db0897e19bad6c0aa722b4da to your computer and use it in GitHub Desktop.
Fast division using precomputed multiplicative inverse
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <time.h>
#define PAGE_SIZE 4096
typedef struct asmbuf_s asmbuf_t;
struct asmbuf_s {
uint8_t code[PAGE_SIZE - sizeof(uint64_t)];
uint64_t count;
};
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
#include <sys/mman.h>
asmbuf_t* asmbuf_create(void) {
return mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
}
void asmbuf_finalize(asmbuf_t* buf) {
mprotect(buf, sizeof(asmbuf_t), PROT_READ | PROT_EXEC);
}
void asmbuf_free(asmbuf_t* buf) {
munmap(buf, PAGE_SIZE);
}
#elif defined(WIN32)
#include <windows.h>
asmbuf_t* asmbuf_create(void) {
return VirtualAlloc(NULL, PAGE_SIZE, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
}
void asmbuf_finalize(asmbuf_t* buf) {
DWORD old;
VirtualProtect(buf, sizeof(*buf), PAGE_EXECUTE_READ, &old);
}
void asmbuf_free(asmbuf_t* buf) {
VirtualFree(buf, 0, MEM_RELEASE);
}
#else
#error "Cannot allocate asm buffer"
#endif
typedef struct fastdiv_s fastdiv_t;
struct fastdiv_s {
uint32_t m, s1, s2;
};
fastdiv_t fastdiv_init(uint32_t div) {
uint32_t l = 1;
while ((1 << l) < div) ++l;
uint64_t max = 1ull << 32ull;
uint32_t m = (max << l) / div - max + 1;
uint32_t s1 = l < 1 ? l : 1;
uint32_t s2 = l > 1 ? l - 1 : 0;
return (fastdiv_t) { .m = m, .s1 = s1, .s2 = s2 };
}
uint32_t fastdiv(fastdiv_t fd, uint32_t i) {
uint32_t t = ((uint64_t)fd.m * (uint64_t)i) >> 32ull;
return (t + ((i - t) >> fd.s1)) >> fd.s2;
}
asmbuf_t* asmbuf_gen_fastdiv(uint32_t div) {
asmbuf_t* buf = asmbuf_create();
// power of 2
if ((div & (div - 1)) == 0) {
uint32_t s = 0;
while ((div & 1) == 0) s++, div >>= 1;
// mov %edi, %eax
buf->code[buf->count++] = 0x89;
buf->code[buf->count++] = 0xf8;
// shr imm8, %eax
buf->code[buf->count++] = 0xc1;
buf->code[buf->count++] = 0xe8;
buf->code[buf->count++] = s;
// retq
buf->code[buf->count++] = 0xc3;
} else {
fastdiv_t fd = fastdiv_init(div);
// mov %edi, %eax
buf->code[buf->count++] = 0x89;
buf->code[buf->count++] = 0xf8;
// mov imm32, %edx
buf->code[buf->count++] = 0xba;
buf->code[buf->count++] = fd.m;
buf->code[buf->count++] = fd.m >> 8;
buf->code[buf->count++] = fd.m >> 16;
buf->code[buf->count++] = fd.m >> 24;
// mul %edx
buf->code[buf->count++] = 0xf7;
buf->code[buf->count++] = 0xe2;
// mov %edi, %eax
buf->code[buf->count++] = 0x89;
buf->code[buf->count++] = 0xf8;
// sub %edx, %eax
buf->code[buf->count++] = 0x29;
buf->code[buf->count++] = 0xd0;
if (fd.s1 > 0) {
// shr imm8, %eax
buf->code[buf->count++] = 0xc1;
buf->code[buf->count++] = 0xe8;
buf->code[buf->count++] = fd.s1;
}
// add %edx, %eax
buf->code[buf->count++] = 0x01;
buf->code[buf->count++] = 0xd0;
// shr imm8, %eax
if (fd.s2 > 0) {
// shr imm8, %eax
buf->code[buf->count++] = 0xc1;
buf->code[buf->count++] = 0xe8;
buf->code[buf->count++] = fd.s2;
}
// retq
buf->code[buf->count++] = 0xc3;
}
asmbuf_finalize(buf);
return buf;
}
int main(int argc, char** argv) {
if (argc < 2)
return 1;
uint32_t n = strtol(argv[1], NULL, 10);
clock_t start, end;
uint32_t sum;
asmbuf_t* buf = asmbuf_gen_fastdiv(n);
uint32_t (*div)(uint32_t) = (void*)buf->code;
start = clock();
sum = 0;
for (uint32_t i = 0; i != 0xFFFFFFFF; ++i)
sum += div(i);
end = clock();
asmbuf_free(buf);
printf("jit: %d - %d ms\n", sum, (end - start) * 1000 / CLOCKS_PER_SEC);
fastdiv_t fd = fastdiv_init(n);
start = clock();
sum = 0;
for (uint32_t i = 0; i != 0xFFFFFFFF; ++i)
sum += fastdiv(fd, i);
end = clock();
printf("fastdiv: %d - %d ms\n", sum, (end - start) * 1000 / CLOCKS_PER_SEC);
start = clock();
sum = 0;
for (uint32_t i = 0; i != 0xFFFFFFFF; ++i)
sum += i / n;
end = clock();
printf("ref: %d - %d ms\n", sum, (end - start) * 1000 / CLOCKS_PER_SEC);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment