Skip to content

Instantly share code, notes, and snippets.

@khayrov
Created December 26, 2010 23:05
Show Gist options
  • Save khayrov/755710 to your computer and use it in GitHub Desktop.
Save khayrov/755710 to your computer and use it in GitHub Desktop.
Micro-optimized (but algorithmically inefficient) prime numbers finder
#!/bin/sh
make clean
make
cat /proc/cpuinfo > report.txt
echo >> report.txt
uname -a >> report.txt
echo >> report.txt
g++ -E -march=native -Wp,-dM prime.cpp | grep SSE >> report.txt
echo >> report.txt
for program in prime_int prime_fp_sse prime_fp_x87 prime_asm ; do
if [ -x ./$program ]; then
echo "Performing sanity check for $program"
checksum=`./$program 5000000 | md5sum | sed -e 's/[^0-9a-f]//g'`
if [ $checksum != 1a0e63b08ad78d461fa05115d8c15abf ]; then
echo "Sanity check failed for $program"
else
echo "Benchmarking $program"
echo $program >> report.txt
for i in `seq 1 5`; do
echo "Iteration $i of 5"
/usr/bin/time ./$program 5000000 > /dev/null 2>> report.txt
done
fi
fi
done
CFLAGS=-O2 -ffast-math -march=native
CXXFLAGS=$(CFLAGS)
ifneq (, $(findstring 64, $(shell uname -m)))
asm_file=prime64.S
x86_64=1
else
asm_file=prime32.S
endif
files=prime.cpp $(asm_file)
targets=prime_int prime_fp_x87 prime_fp_sse prime_asm
all: $(targets)
prime_int: $(files)
$(CXX) $(CXXFLAGS) $^ -o $@
ifdef x86_64
prime_fp_sse: $(files)
$(CXX) $(CXXFLAGS) -DUSE_FP $^ -o $@
prime_fp_x87:
else
prime_fp_sse: $(files)
$(CXX) $(CXXFLAGS) -mfpmath=sse -DUSE_FP $^ -o $@
prime_fp_x87: $(files)
$(CXX) $(CXXFLAGS) -DUSE_FP $^ -o $@
endif
prime_asm: $(files)
$(CXX) $(CXXFLAGS) -DUSE_ASM $^ -o $@
.PHONY: clean
clean:
rm -f $(targets)
#include <iostream>
#include <cstdlib>
#include <cmath>
extern "C" bool is_prime_asm(int n);
static bool is_prime_int(int n)
{
int limit = sqrt(n);
for (int i = 3; i <= limit; i += 2)
if (n % i == 0)
return false;
return true;
}
static bool is_prime_fp(int n)
{
double limit = sqrt(n);
double nf = n;
for (double i = 3.0; i <= limit; i += 2.0)
{
double d = nf / i;
if (d == trunc(d))
return false;
}
return true;
}
#ifdef USE_ASM
#define is_prime is_prime_asm
#elif defined(USE_FP)
#define is_prime is_prime_fp
#else
#define is_prime is_prime_int
#endif
int main(int argc, char **argv)
{
if (argc < 2)
{
std::cerr << "Usage: " << argv[0] << " N\n";
return 1;
}
int maxn = atoi(argv[1]);
if (maxn < 2)
{
return 0;
}
std::cout << 2 << '\n';
for (int n = 3; n <= maxn; n += 2)
if (is_prime(n))
std::cout << n << '\n';
}
.section .rodata
.p2align 4
.LC_div:
.double 3.0, 5.0
.LC_inc:
.double 4.0, 4.0
.text
.global is_prime_asm
.p2align 4
is_prime_asm:
mov 4(%esp), %eax
push %eax
push %eax
cvtdq2pd (%esp), %xmm0
sqrtsd %xmm0, %xmm1
movapd .LC_div, %xmm2
xor %eax, %eax
jmp .L2
.L1:
movapd %xmm0, %xmm3
divpd %xmm2, %xmm3
#ifdef __SSE4_1__
roundpd $0xf, %xmm3, %xmm5
#else
cvtpd2dq %xmm3, %xmm4
cvtdq2pd %xmm4, %xmm5
#endif
cmpeqpd %xmm3, %xmm5
movmskpd %xmm5, %edx
test %edx, %edx
jnz .L_exit
addpd .LC_inc, %xmm2
.L2:
comisd %xmm1, %xmm2
jc .L1
jz .L1
inc %eax
.L_exit:
add $8, %esp
ret
.section .rodata
.p2align 4
.LC_div:
.double 3.0, 5.0
.LC_inc:
.double 4.0, 4.0
.text
.global is_prime_asm
.p2align 4
is_prime_asm:
#ifdef __SSE3__
cvtsi2sd %edi, %xmm1
movddup %xmm1, %xmm0
#else
mov %edi, -4(%rsp)
mov %edi, -8(%rsp)
cvtdq2pd -8(%rsp), %xmm0
#endif
sqrtsd %xmm0, %xmm1
movapd .LC_div(%rip), %xmm2
xor %eax, %eax
jmp .L2
.L1:
movapd %xmm0, %xmm3
divpd %xmm2, %xmm3
#ifdef __SSE4_1__
roundpd $0xf, %xmm3, %xmm5
#else
cvtpd2dq %xmm3, %xmm4
cvtdq2pd %xmm4, %xmm5
#endif
cmpeqpd %xmm3, %xmm5
movmskpd %xmm5, %edx
test %edx, %edx
jnz .L_exit
addpd .LC_inc(%rip), %xmm2
.L2:
comisd %xmm1, %xmm2
jc .L1
jz .L1
inc %eax
.L_exit:
ret $0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment