Skip to content

Instantly share code, notes, and snippets.

@prehistoricpenguin
Last active December 10, 2015 01:48
Show Gist options
  • Save prehistoricpenguin/4362213 to your computer and use it in GitHub Desktop.
Save prehistoricpenguin/4362213 to your computer and use it in GitHub Desktop.
bit-couting (8bits integer)
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <assert.h>
int totalRec = 1e8;
const int tbl[] =
{
#define B2(n) n, n+1, n+1, n+2
#define B4(n) B2(n), B2(n+1), B2(n+1), B2(n+2)
#define B6(n) B4(n), B4(n+1), B4(n+1), B4(n+2)
B6(0), B6(1), B6(1), B6(2)
};
int naiveCount (int val)
{
char cnt = 0;
while (val)
{
cnt += (val & 1);
val = val >> 1;
}
return cnt;
}
inline int tableLookUp(int val)
{
assert(val >= 0 && val <= 255);
return tbl[val];
}
int asmCount(int val)
{
int res = 0;
asm volatile("xor %0, %0\n\t"
"begin:\n\t"
"cmp $0x0, %1\n\t"
"jle end\n\t"
"movl %1, %%ecx\n\t"
"and $0x1, %%ecx\n\t"
"addl %%ecx, %0\n\t"
"shrl %1\n\t"
"jmp begin\n\t"
"end:"
: "=r"(res)
: "r" (val));
return res;
}
int asmCountSecond(int val)
{
int ans=0;
asm volatile("popcntl %0,%1"
:"=r"(ans)
:"r"(ans),"r"(val));
return ans;
}
int asmCountThird(int val)
{
int res = 0;
asm volatile("begin1:\n\t"
"movl %1, %%ecx\n\t"
"and $0x1, %%ecx\n\t"
"addl %%ecx, %0\n\t"
"shrl %1\n\t"
"jnz begin1\n\t"
"end1:"
: "=r"(res)
: "r" (val)
: "ecx"); // Important: clobbers ecx!
return res;
}
void printTable()
{
int i;
for (i = 0; i < 255; ++i)
printf("%d%s", tbl[i], (i + 1) % 4 ? " " : "\n");
}
void checkTable()
{
int totErr = 0,
i = 0;
for (i = 0; i < 255; ++i)
totErr += (tbl[i] != naiveCount(i));
assert (totErr == 0);
}
void checkAsm()
{
int totErr = 0,
i = 0;
for (i = 0; i < 255; ++i)
{
totErr += (tbl[i] != asmCountThird(i));
#if defined DEBUG
printf("%d %d\n",tbl[i], asmCountThird(i));
}
assert(totErr == 0);
#else
}
#endif
}
void checkFunction()
{
checkTable();
checkAsm();
}
#define ull unsigned long long
ull get_clock()
{
ull ret;
asm volatile("rdtsc\n\t":"=A"(ret));
return ret;
}
int main()
{
const int totTimes = 1e7;
int i;
int val;
checkFunction();
printf("every function run %d times\n",totTimes);
ull time0 = get_clock(),
time1;
for (i = 0;i< totTimes; ++i)
{
val = i% 256;
naiveCount(val);
}
time1 = get_clock();
printf("naive, clock used :%lld\n",time1 - time0);
time0 = get_clock();
for (i = 0; i < totTimes; ++i)
{
val = i % 256;
tableLookUp(val);
}
time1 = get_clock();
printf("table,clock used :%lld\n",time1 - time0);
time0 = get_clock();
for (i = 0; i < totTimes; ++i)
{
val = i % 256;
asmCount(val);
}
time1 = get_clock();
printf("asm,clock used :%lld\n",time1 - time0);
time0 = get_clock();
for (i = 0; i < totTimes; ++i)
{
val = i% 256;
asmCountSecond(val);
}
time1 = get_clock();
printf("pop_cnt,clock used :%lld\n",time1 - time0);
time0 = get_clock();
for (i = 0; i < totTimes; ++i)
{
val = i% 256;
asmCountThird(val);
}
time1 = get_clock();
printf("asm(o),clock used :%lld\n",time1 - time0);
return 0;
}
/*
every function run 10000000 times
naive, clock used :602703867
table,clock used :142733689
asm,clock used :341840472
pop_cnt,clock used :125105241
asm(o),clock used :315558483
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment