Skip to content

Instantly share code, notes, and snippets.

@donnaken15
Last active June 13, 2024 22:53
Show Gist options
  • Save donnaken15/2e92fc2ffa3ff78e4c9e5720470a3c76 to your computer and use it in GitHub Desktop.
Save donnaken15/2e92fc2ffa3ff78e4c9e5720470a3c76 to your computer and use it in GitHub Desktop.
further optimized prime counter with reordered code to avoid register stall (actually working :O) and check and compare square root of current number, allocate more than reserved memory will allow
format PE64 console 3.1
entry @f
include 'win64a.inc'
MAX_ITERATIONS = 100000000
section '' import code data \
readable writeable executable
library msvcrt,'MSVCRT.DLL'
import msvcrt,\
printf,'printf'
align 10h
@@:
mov rdi , rdi
inc rdi
mov rsi , 5
@:
xor rbp , rbp
@@:
mov rcx , [p + rbp * 8]
xor rdx , rdx
mov rax , rsi
inc rbp
div rcx
test rdx , rdx
jz @f
cmp rbp , rdi
jb @b
mov [p + rdi * 8], rsi
invoke printf, fmt, rsi
inc rdi
@@:
add rsi , 2
;cmp rsi , -3
;jb @
jmp @
;ret
fmt db '%u',13,0
align 10h
p dq 3
rq MAX_ITERATIONS
; 8 tabs suck, github please store the indent setting for files or something
format PE64 console 3.1
entry @f
include 'win64a.inc'
MAX_ITERATIONS = 100000000
macro alignd boundary,value:? { db (boundary-1)-($+boundary-1) mod boundary dup value }
section '' import code data \
readable writeable executable
library msvcrt,'MSVCRT.DLL'
import msvcrt,\
printf,'printf'
alignd 20h
; rdi = current index at which to store the next possible prime, increments if found
; rbp = current index of prime numbers to test against rsi
; rsi = current number to test as prime
; r8 = upper limit of numbers to check, square root of current number
; rcx = dividend, any of the prime numbers to test against rdx
; rdx = remainder after rdx:rax / rcx, if 0 skip current value that rsi is
@@:
mov rdi , 1 ; j = 1 ; number of primes stored
mov rsi , 5 ; X = 5;
fld1
align 10h
main:
push rsi
fild qword [rsp] ; A = X;
fsqrt ; sqrt(A);
xor rbp , rbp ; i = 0;
fld st
fistp qword [rsp]
fprem ; A %= 1.0;
ftst
fnstsw ax
pop r8 ; C = floor(X);
test ah , 40h ; if (A == 0.0)
jnz next ; skip ---.
align 10h ; |
@@: ; | <--.
mov rcx , [p + rbp * 8] ; A = p[i++]; | |
xor rdx , rdx ; |
mov rax , rsi ; |
inc rbp ; |
; |
div rcx ; B = X % A; | |
test rdx , rdx ; if (B == 0) | |
jz next ; skip ---| |
; | |
cmp rcx , r8 ; if (A < C) | |
jb @b ; continue | ---'
; |
mov [p + rdi * 8], rsi ; p[j] = X; |
invoke printf, fmt, rsi ; |
inc rdi ; j++; |
next: ; <--'
add rsi , 2 ; X += 2 ; next odd number
fxch
ffree st1
jmp main
;cmp rsi , -3
;jb @
;ret
fmt db '%u',13,0
alignd 10h
p dq 3
rq MAX_ITERATIONS
printing = 0
if printing <> 0
format PE64 console 3.1
else
format PE64 GUI 3.1
end if
entry @f
include 'win64a.inc'
MAX_ITERATIONS = 990000000 ;2147483647
MAX_ITERATIONS_2 = 1; 900000000
macro alignd b,v:? { db (b-1)-($+b-1) mod b dup v }
section '' import code data readable writeable executable
library msvcrt,'MSVCRT.DLL',\
kernel,'KERNEL32.DLL'
if printing <> 0
import msvcrt,\
malloc,'_aligned_malloc',\
free,'_aligned_free',\
printf,'printf'
else
import msvcrt,\
malloc,'_aligned_malloc',\
free,'_aligned_free'
end if
import kernel,\
Sleep,'Sleep'
alignd 80h
; rdi = current index at which to store the next possible prime, increments if found
; rbp = current index of prime numbers to test against rsi
; rsi = current number to test as prime
; r8 = upper limit of numbers to check, square root of current number
; rcx = divisor, any of the prime numbers to test against rdx
; rdx = remainder after rdx:rax / rcx, if 0 skip current value that rsi is
@@:
invoke malloc, ((MAX_ITERATIONS + MAX_ITERATIONS_2) * 8), 1000h
mov rbp , 3
mov [rax] , rbp
mov rbp , rax
mov rdi , 1 ; j = 1 ; number of primes stored
mov rsi , 5 ; X = 5;
xor rbx , rbx ; i = 0;
xorps xmm2 , xmm2
align 10h
main: ; LITERALLY 0x11 BYTES WITH XOR RBX AYFS!!!!!!
cvtsi2sd xmm0 , rsi ; A = X;
sqrtsd xmm0 , xmm0 ; A = sqrt(A);
roundsd xmm1 , xmm0, 1 ; floor(A);
subsd xmm0 , xmm1 ; A -= /
comisd xmm0 , xmm2 ; if (A == 0.0)
jz next ; skip ----.
cvtsd2si r8 , xmm1 ; |
align 10h ; |
@@: ; | <---.
mov rcx , [rbp + rbx * 8] ; A = p[i++]; | |
xor rdx , rdx ; | |
mov rax , rsi ; | |
inc rbx ; | |
; | |
div rcx ; B = X % A; | |
test rdx , rdx ; if (B == 0) | |
jz next ; skip ----| |
; | |
cmp rcx , r8 ; if (A < C) | |
jb @b ; continue | ----'
; |
mov [rbp + rdi * 8], rsi ; p[j] = X; |
if printing <> 0 ; |
invoke printf , fmt, rsi ; |
end if ; |
inc rdi ; j++; |
next: ; <---'
xor rbx , rbx ; i = 0;
add rsi , 2 ; X += 2 ; next odd number
mov r9 , rdi
sub r9 , MAX_ITERATIONS
jb main
cmp r9 , MAX_ITERATIONS_2
jb main
sub rsp , 20h
mov rcx , rbp
call free
mov rcx , 0FFFFFFFFFFFFFFFFh
call Sleep
add rsp , 20h
ret
fmt db '%llu',13,0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment