Last active
December 10, 2015 01:48
-
-
Save prehistoricpenguin/4362213 to your computer and use it in GitHub Desktop.
bit-couting (8bits integer)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <time.h> | |
#include <assert.h> | |
int totalRec = 1e8; | |
const int tbl[] = | |
{ | |
#define B2(n) n, n+1, n+1, n+2 | |
#define B4(n) B2(n), B2(n+1), B2(n+1), B2(n+2) | |
#define B6(n) B4(n), B4(n+1), B4(n+1), B4(n+2) | |
B6(0), B6(1), B6(1), B6(2) | |
}; | |
int naiveCount (int val) | |
{ | |
char cnt = 0; | |
while (val) | |
{ | |
cnt += (val & 1); | |
val = val >> 1; | |
} | |
return cnt; | |
} | |
inline int tableLookUp(int val) | |
{ | |
assert(val >= 0 && val <= 255); | |
return tbl[val]; | |
} | |
int asmCount(int val) | |
{ | |
int res = 0; | |
asm volatile("xor %0, %0\n\t" | |
"begin:\n\t" | |
"cmp $0x0, %1\n\t" | |
"jle end\n\t" | |
"movl %1, %%ecx\n\t" | |
"and $0x1, %%ecx\n\t" | |
"addl %%ecx, %0\n\t" | |
"shrl %1\n\t" | |
"jmp begin\n\t" | |
"end:" | |
: "=r"(res) | |
: "r" (val)); | |
return res; | |
} | |
int asmCountSecond(int val) | |
{ | |
int ans=0; | |
asm volatile("popcntl %0,%1" | |
:"=r"(ans) | |
:"r"(ans),"r"(val)); | |
return ans; | |
} | |
int asmCountThird(int val) | |
{ | |
int res = 0; | |
asm volatile("begin1:\n\t" | |
"movl %1, %%ecx\n\t" | |
"and $0x1, %%ecx\n\t" | |
"addl %%ecx, %0\n\t" | |
"shrl %1\n\t" | |
"jnz begin1\n\t" | |
"end1:" | |
: "=r"(res) | |
: "r" (val) | |
: "ecx"); // Important: clobbers ecx! | |
return res; | |
} | |
void printTable() | |
{ | |
int i; | |
for (i = 0; i < 255; ++i) | |
printf("%d%s", tbl[i], (i + 1) % 4 ? " " : "\n"); | |
} | |
void checkTable() | |
{ | |
int totErr = 0, | |
i = 0; | |
for (i = 0; i < 255; ++i) | |
totErr += (tbl[i] != naiveCount(i)); | |
assert (totErr == 0); | |
} | |
void checkAsm() | |
{ | |
int totErr = 0, | |
i = 0; | |
for (i = 0; i < 255; ++i) | |
{ | |
totErr += (tbl[i] != asmCountThird(i)); | |
#if defined DEBUG | |
printf("%d %d\n",tbl[i], asmCountThird(i)); | |
} | |
assert(totErr == 0); | |
#else | |
} | |
#endif | |
} | |
void checkFunction() | |
{ | |
checkTable(); | |
checkAsm(); | |
} | |
#define ull unsigned long long | |
ull get_clock() | |
{ | |
ull ret; | |
asm volatile("rdtsc\n\t":"=A"(ret)); | |
return ret; | |
} | |
int main() | |
{ | |
const int totTimes = 1e7; | |
int i; | |
int val; | |
checkFunction(); | |
printf("every function run %d times\n",totTimes); | |
ull time0 = get_clock(), | |
time1; | |
for (i = 0;i< totTimes; ++i) | |
{ | |
val = i% 256; | |
naiveCount(val); | |
} | |
time1 = get_clock(); | |
printf("naive, clock used :%lld\n",time1 - time0); | |
time0 = get_clock(); | |
for (i = 0; i < totTimes; ++i) | |
{ | |
val = i % 256; | |
tableLookUp(val); | |
} | |
time1 = get_clock(); | |
printf("table,clock used :%lld\n",time1 - time0); | |
time0 = get_clock(); | |
for (i = 0; i < totTimes; ++i) | |
{ | |
val = i % 256; | |
asmCount(val); | |
} | |
time1 = get_clock(); | |
printf("asm,clock used :%lld\n",time1 - time0); | |
time0 = get_clock(); | |
for (i = 0; i < totTimes; ++i) | |
{ | |
val = i% 256; | |
asmCountSecond(val); | |
} | |
time1 = get_clock(); | |
printf("pop_cnt,clock used :%lld\n",time1 - time0); | |
time0 = get_clock(); | |
for (i = 0; i < totTimes; ++i) | |
{ | |
val = i% 256; | |
asmCountThird(val); | |
} | |
time1 = get_clock(); | |
printf("asm(o),clock used :%lld\n",time1 - time0); | |
return 0; | |
} | |
/* | |
every function run 10000000 times | |
naive, clock used :602703867 | |
table,clock used :142733689 | |
asm,clock used :341840472 | |
pop_cnt,clock used :125105241 | |
asm(o),clock used :315558483 | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment