Created
December 26, 2010 23:05
-
-
Save khayrov/755710 to your computer and use it in GitHub Desktop.
Micro-optimized (but algorithmically inefficient) prime numbers finder
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
make clean | |
make | |
cat /proc/cpuinfo > report.txt | |
echo >> report.txt | |
uname -a >> report.txt | |
echo >> report.txt | |
g++ -E -march=native -Wp,-dM prime.cpp | grep SSE >> report.txt | |
echo >> report.txt | |
for program in prime_int prime_fp_sse prime_fp_x87 prime_asm ; do | |
if [ -x ./$program ]; then | |
echo "Performing sanity check for $program" | |
checksum=`./$program 5000000 | md5sum | sed -e 's/[^0-9a-f]//g'` | |
if [ $checksum != 1a0e63b08ad78d461fa05115d8c15abf ]; then | |
echo "Sanity check failed for $program" | |
else | |
echo "Benchmarking $program" | |
echo $program >> report.txt | |
for i in `seq 1 5`; do | |
echo "Iteration $i of 5" | |
/usr/bin/time ./$program 5000000 > /dev/null 2>> report.txt | |
done | |
fi | |
fi | |
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CFLAGS=-O2 -ffast-math -march=native | |
CXXFLAGS=$(CFLAGS) | |
ifneq (, $(findstring 64, $(shell uname -m))) | |
asm_file=prime64.S | |
x86_64=1 | |
else | |
asm_file=prime32.S | |
endif | |
files=prime.cpp $(asm_file) | |
targets=prime_int prime_fp_x87 prime_fp_sse prime_asm | |
all: $(targets) | |
prime_int: $(files) | |
$(CXX) $(CXXFLAGS) $^ -o $@ | |
ifdef x86_64 | |
prime_fp_sse: $(files) | |
$(CXX) $(CXXFLAGS) -DUSE_FP $^ -o $@ | |
prime_fp_x87: | |
else | |
prime_fp_sse: $(files) | |
$(CXX) $(CXXFLAGS) -mfpmath=sse -DUSE_FP $^ -o $@ | |
prime_fp_x87: $(files) | |
$(CXX) $(CXXFLAGS) -DUSE_FP $^ -o $@ | |
endif | |
prime_asm: $(files) | |
$(CXX) $(CXXFLAGS) -DUSE_ASM $^ -o $@ | |
.PHONY: clean | |
clean: | |
rm -f $(targets) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <cstdlib> | |
#include <cmath> | |
extern "C" bool is_prime_asm(int n); | |
static bool is_prime_int(int n) | |
{ | |
int limit = sqrt(n); | |
for (int i = 3; i <= limit; i += 2) | |
if (n % i == 0) | |
return false; | |
return true; | |
} | |
static bool is_prime_fp(int n) | |
{ | |
double limit = sqrt(n); | |
double nf = n; | |
for (double i = 3.0; i <= limit; i += 2.0) | |
{ | |
double d = nf / i; | |
if (d == trunc(d)) | |
return false; | |
} | |
return true; | |
} | |
#ifdef USE_ASM | |
#define is_prime is_prime_asm | |
#elif defined(USE_FP) | |
#define is_prime is_prime_fp | |
#else | |
#define is_prime is_prime_int | |
#endif | |
int main(int argc, char **argv) | |
{ | |
if (argc < 2) | |
{ | |
std::cerr << "Usage: " << argv[0] << " N\n"; | |
return 1; | |
} | |
int maxn = atoi(argv[1]); | |
if (maxn < 2) | |
{ | |
return 0; | |
} | |
std::cout << 2 << '\n'; | |
for (int n = 3; n <= maxn; n += 2) | |
if (is_prime(n)) | |
std::cout << n << '\n'; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.section .rodata | |
.p2align 4 | |
.LC_div: | |
.double 3.0, 5.0 | |
.LC_inc: | |
.double 4.0, 4.0 | |
.text | |
.global is_prime_asm | |
.p2align 4 | |
is_prime_asm: | |
mov 4(%esp), %eax | |
push %eax | |
push %eax | |
cvtdq2pd (%esp), %xmm0 | |
sqrtsd %xmm0, %xmm1 | |
movapd .LC_div, %xmm2 | |
xor %eax, %eax | |
jmp .L2 | |
.L1: | |
movapd %xmm0, %xmm3 | |
divpd %xmm2, %xmm3 | |
#ifdef __SSE4_1__ | |
roundpd $0xf, %xmm3, %xmm5 | |
#else | |
cvtpd2dq %xmm3, %xmm4 | |
cvtdq2pd %xmm4, %xmm5 | |
#endif | |
cmpeqpd %xmm3, %xmm5 | |
movmskpd %xmm5, %edx | |
test %edx, %edx | |
jnz .L_exit | |
addpd .LC_inc, %xmm2 | |
.L2: | |
comisd %xmm1, %xmm2 | |
jc .L1 | |
jz .L1 | |
inc %eax | |
.L_exit: | |
add $8, %esp | |
ret |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.section .rodata | |
.p2align 4 | |
.LC_div: | |
.double 3.0, 5.0 | |
.LC_inc: | |
.double 4.0, 4.0 | |
.text | |
.global is_prime_asm | |
.p2align 4 | |
is_prime_asm: | |
#ifdef __SSE3__ | |
cvtsi2sd %edi, %xmm1 | |
movddup %xmm1, %xmm0 | |
#else | |
mov %edi, -4(%rsp) | |
mov %edi, -8(%rsp) | |
cvtdq2pd -8(%rsp), %xmm0 | |
#endif | |
sqrtsd %xmm0, %xmm1 | |
movapd .LC_div(%rip), %xmm2 | |
xor %eax, %eax | |
jmp .L2 | |
.L1: | |
movapd %xmm0, %xmm3 | |
divpd %xmm2, %xmm3 | |
#ifdef __SSE4_1__ | |
roundpd $0xf, %xmm3, %xmm5 | |
#else | |
cvtpd2dq %xmm3, %xmm4 | |
cvtdq2pd %xmm4, %xmm5 | |
#endif | |
cmpeqpd %xmm3, %xmm5 | |
movmskpd %xmm5, %edx | |
test %edx, %edx | |
jnz .L_exit | |
addpd .LC_inc(%rip), %xmm2 | |
.L2: | |
comisd %xmm1, %xmm2 | |
jc .L1 | |
jz .L1 | |
inc %eax | |
.L_exit: | |
ret $0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment