donnaken15/st.asm

## st.asm

; jan 2023

format PE console 3.1

entry main

include 'win32a.inc'

section '' code data readable writeable executable
    main:

        ; my first x87 code!
        ; smiling at the beginning
        ; of a path to insanity
        push  9.0f
        fld   dword [esp]
        fsqrt
        fstp  dword [esp]
        pop   eax

        mov   eax , ltable

        push  dword[eax+(3*4)] ; esp+12 / Y
        push  dword[eax+(2*4)] ; esp+8  / X
        push  0.5f             ; esp+4  / A
        call  lerp

        mov   ebx , eax

        push  7      ; fc
        push  ltable ; f
        push  2.5f   ; x
        call  lerpa

        push  ebx
        fld   dword[esp]
        pop   ebx
        push  eax
        fld   dword[esp]
        pop   eax

        ffree st1
        ffree st0

        align 10h
        ;ret

        ;push 2.0f
        ;fld  dword [esp]
        ;pop  eax
        ;push 7.0f
        ;fld  dword [esp]
        ;faddp
        ;fstp dword [esp]
        ;pop  eax

        mov   edi , 7.35f

        push  edi
        call  sinapprox

        push  edi
        fld   dword[esp]
        fsin
        fst   dword[esp]
        ;fstp  dword[esp]
        pop   edi

        push  eax
        fld   dword[esp]
        pop   eax

        ;push  ebx
        ;fld   dword[esp]
        ;push  eax
        ;fld   dword[esp]
        ffree st1
        ffree st0

        ;int3
        ret

        align 10h
    lerp:
        pop   edx
        fld   dword[esp+8] ; Y
        fld   dword[esp+4] ; X
        fsubp
        fld   dword[esp] ; A
        fmulp
        fld   dword[esp+4] ; X
        faddp
        add   esp , 8 ; pop A,X,Y
        fstp  dword[esp]
        pop   eax
        jmp   edx
    lerpa:
        mov   edx ,[esp+12] ; fc
        fld1
        fld   dword[esp+4] ; x
        fprem
        ffree st1 ; free 1
        fld   dword[esp+4]
        fsub  st0 , st1
        ; st0 = floorx
        ; st1 = mod1x
        sub   esp , 4
        fistp dword[esp]
        pop   eax
        dec   edx
        cmp   eax , edx
        jl    lerpa_oor
        ; floorx >= fc
        mov   eax ,[esp+8]
        mov   eax ,[eax+edx*4]
      lerpa_oor:
        pop   ecx ; ret addr

        mov   edx ,[esp+4]
        add   esp ,12
        push  dword[edx+eax*4]
        inc   eax
        push  dword[edx+eax*4]

        sub   esp ,4
        fstp  dword[esp]

        push  ecx
        jmp   lerp
    sin:
        pop   edx
        fld   dword[esp]
        fsin
        fst   dword[esp]
        pop   eax
        jmp   ecx
    ;public sinapprox
    sinapprox:
        mov   eax ,[esp+4] ; if (!x) return 0;
        test  eax , eax
        jnz   sina_main
        ret
      sina_main:
        fld   dword[pix2]  ; pi*2
        fld   dword[esp+4] ; x
        ; is this faster than div as int
        fprem
        ;jmp   sina_inv_c
      ;sina_inv:
        ;fadd  st0 , st1
        ;fst   dword[esp+4]
        ;;mov   eax ,[esp+4]
      ;sina_inv_c:
        ;test  dword[esp+4], 80000000h ; x & sign bit
        ;jnz   sina_inv
      sina_pos:
        ffree st1
        ; ((x/(pi/2))*sinprec)
        fld   dword[pihalf] ; pi/2
        fdivp st1 , st0     ; x /= (pi/2)
        fimul dword[sinprec]; x *= sinprec
        fistp dword[esp+4]
        ; esp+4 and eax now = int error
        pop   ebx ; ret addr
        pop   eax
        ;xor   edx , edx
        ;mov   ecx ,[sinprec]
        ;shl   ecx , 2
        ;div   ecx ; error %= sinprec * 4; @ edx
        ;mov   eax , edx
        cmp   eax ,[sinprec]  ;shr   ecx , 2
        jge   sina_not_0_1pi ; if (error < sinprec)
          mov   edx , sintable
          mov   eax , [edx+eax*4]
          jmp   ebx
        sina_not_0_1pi: ; if (error == sinprec)
          jne   sina_not_eq_sinprec
          mov   eax , 3F800000h
          jmp   ebx
        sina_not_eq_sinprec: ; if (error >= sinprec * 2)
          mov   ecx ,[sinprec]
          shl   ecx , 1        ; sinprec << 1 or sinprec * 2
          cmp   eax , ecx
          jl    sina_not_2_3pi
            sub eax , ecx ; error -= sinprec << 1
            cmp eax ,[sinprec]
            jne sina_not_eq_sinprec2
            mov eax , $BF800000
            jmp ebx
            sina_not_eq_sinprec2:
            jle sina_not_3_4pi
              mov  ecx , [sinprec]
              sub  eax , ecx ; error -= sinprec
              mov  edx , eax ; error =
              mov  eax , ecx ; sinprec
              sub  eax , edx ; -error
            sina_not_3_4pi:
            mov   edx , sintable ; sintable
            mov   eax , [edx+eax*4] ; [error]
            or    eax , 80000000h ; -
            jmp   ebx
          sina_not_2_3pi:
          mov   edx , [sinprec]
          cmp   eax , edx
          ; if (error > sinprec)
          jl    sina_not_1_2pi
          ; error -= sinprec
          ; error = sinprec - error
          ; wtf
          neg   eax
          ;shl   edx , 1 ; TIL sh*/sa* writes 2nd imm operand in one byte yay
          add   eax , edx
          add   eax , edx
          mov   edx , sintable ; sintable
          mov   eax , [edx+eax*4] ; [error]
          jmp   ebx
          sina_not_1_2pi: ; how do you land here
          mov   eax , 7FF80000h ; NaN
          jmp   ebx

        align 10h

    ltable dd 0.0,0.8415,0.9093,0.1411,-0.7568,-0.9589,-0.2794
    ;pi   dd 3.14159265359f ; HOW DO I USE THE PREPROCCESSOR = VALUE THING
    ; probably only works for integers :(
    pix2 dd 6.28318530718f
    pihalf dd 1.57079632679f
    sinprec dd 8192
    sintable file 'sintable.bin' ; dd dup [sinprec]

	; jan 2023

	format PE console 3.1

	entry main

	include 'win32a.inc'

	section '' code data readable writeable executable
	main:

	; my first x87 code!
	; smiling at the beginning
	; of a path to insanity
	push 9.0f
	fld dword [esp]
	fsqrt
	fstp dword [esp]
	pop eax

	mov eax , ltable

	push dword[eax+(3*4)] ; esp+12 / Y
	push dword[eax+(2*4)] ; esp+8 / X
	push 0.5f ; esp+4 / A
	call lerp

	mov ebx , eax

	push 7 ; fc
	push ltable ; f
	push 2.5f ; x
	call lerpa

	push ebx
	fld dword[esp]
	pop ebx
	push eax
	fld dword[esp]
	pop eax

	ffree st1
	ffree st0

	align 10h
	;ret

	;push 2.0f
	;fld dword [esp]
	;pop eax
	;push 7.0f
	;fld dword [esp]
	;faddp
	;fstp dword [esp]
	;pop eax

	mov edi , 7.35f

	push edi
	call sinapprox

	push edi
	fld dword[esp]
	fsin
	fst dword[esp]
	;fstp dword[esp]
	pop edi

	push eax
	fld dword[esp]
	pop eax

	;push ebx
	;fld dword[esp]
	;push eax
	;fld dword[esp]
	ffree st1
	ffree st0

	;int3
	ret

	align 10h
	lerp:
	pop edx
	fld dword[esp+8] ; Y
	fld dword[esp+4] ; X
	fsubp
	fld dword[esp] ; A
	fmulp
	fld dword[esp+4] ; X
	faddp
	add esp , 8 ; pop A,X,Y
	fstp dword[esp]
	pop eax
	jmp edx
	lerpa:
	mov edx ,[esp+12] ; fc
	fld1
	fld dword[esp+4] ; x
	fprem
	ffree st1 ; free 1
	fld dword[esp+4]
	fsub st0 , st1
	; st0 = floorx
	; st1 = mod1x
	sub esp , 4
	fistp dword[esp]
	pop eax
	dec edx
	cmp eax , edx
	jl lerpa_oor
	; floorx >= fc
	mov eax ,[esp+8]
	mov eax ,[eax+edx*4]
	lerpa_oor:
	pop ecx ; ret addr

	mov edx ,[esp+4]
	add esp ,12
	push dword[edx+eax*4]
	inc eax
	push dword[edx+eax*4]

	sub esp ,4
	fstp dword[esp]

	push ecx
	jmp lerp
	sin:
	pop edx
	fld dword[esp]
	fsin
	fst dword[esp]
	pop eax
	jmp ecx
	;public sinapprox
	sinapprox:
	mov eax ,[esp+4] ; if (!x) return 0;
	test eax , eax
	jnz sina_main
	ret
	sina_main:
	fld dword[pix2] ; pi*2
	fld dword[esp+4] ; x
	; is this faster than div as int
	fprem
	;jmp sina_inv_c
	;sina_inv:
	;fadd st0 , st1
	;fst dword[esp+4]
	;;mov eax ,[esp+4]
	;sina_inv_c:
	;test dword[esp+4], 80000000h ; x & sign bit
	;jnz sina_inv
	sina_pos:
	ffree st1
	; ((x/(pi/2))*sinprec)
	fld dword[pihalf] ; pi/2
	fdivp st1 , st0 ; x /= (pi/2)
	fimul dword[sinprec]; x *= sinprec
	fistp dword[esp+4]
	; esp+4 and eax now = int error
	pop ebx ; ret addr
	pop eax
	;xor edx , edx
	;mov ecx ,[sinprec]
	;shl ecx , 2
	;div ecx ; error %= sinprec * 4; @ edx
	;mov eax , edx
	cmp eax ,[sinprec] ;shr ecx , 2
	jge sina_not_0_1pi ; if (error < sinprec)
	mov edx , sintable
	mov eax , [edx+eax*4]
	jmp ebx
	sina_not_0_1pi: ; if (error == sinprec)
	jne sina_not_eq_sinprec
	mov eax , 3F800000h
	jmp ebx
	sina_not_eq_sinprec: ; if (error >= sinprec * 2)
	mov ecx ,[sinprec]
	shl ecx , 1 ; sinprec << 1 or sinprec * 2
	cmp eax , ecx
	jl sina_not_2_3pi
	sub eax , ecx ; error -= sinprec << 1
	cmp eax ,[sinprec]
	jne sina_not_eq_sinprec2
	mov eax , $BF800000
	jmp ebx
	sina_not_eq_sinprec2:
	jle sina_not_3_4pi
	mov ecx , [sinprec]
	sub eax , ecx ; error -= sinprec
	mov edx , eax ; error =
	mov eax , ecx ; sinprec
	sub eax , edx ; -error
	sina_not_3_4pi:
	mov edx , sintable ; sintable
	mov eax , [edx+eax*4] ; [error]
	or eax , 80000000h ; -
	jmp ebx
	sina_not_2_3pi:
	mov edx , [sinprec]
	cmp eax , edx
	; if (error > sinprec)
	jl sina_not_1_2pi
	; error -= sinprec
	; error = sinprec - error
	; wtf
	neg eax
	;shl edx , 1 ; TIL sh/sa writes 2nd imm operand in one byte yay
	add eax , edx
	add eax , edx
	mov edx , sintable ; sintable
	mov eax , [edx+eax*4] ; [error]
	jmp ebx
	sina_not_1_2pi: ; how do you land here
	mov eax , 7FF80000h ; NaN
	jmp ebx

	align 10h

	ltable dd 0.0,0.8415,0.9093,0.1411,-0.7568,-0.9589,-0.2794
	;pi dd 3.14159265359f ; HOW DO I USE THE PREPROCCESSOR = VALUE THING
	; probably only works for integers :(
	pix2 dd 6.28318530718f
	pihalf dd 1.57079632679f
	sinprec dd 8192
	sintable file 'sintable.bin' ; dd dup [sinprec]