ped7g/fixed_point_math_example_x86_64_linux.asm

## fixed_point_math_example_x86_64_linux.asm
; (C) [copyleft] 2018 Peter Helcmanovsky
; License: CC0 https://creativecommons.org/share-your-work/public-domain/cc0
;
; x86_64 linux asm example of fixed-point arithmetic
; (see https://en.wikipedia.org/wiki/Fixed-point_arithmetic)
;
; to build I'm using nasm and ld:
; nasm -f elf64 %f -l %n.lst -w+all; ld -b elf64-x86-64 -o %n %n.o
; (where %f is source file name and %n is just the main part w/o extension)
;
; equation "(a*c-b/a)*(c+d)" done in fixed 48:16 point math fashion
; all values are kept as 64 bit integers, where top 48 bits represent the "whole"
; part and low 16 bits represent the fraction.
; I.e. value 0x54000 represents value 5.25:
; upper 48 bits = 5 (0x50000), low 16bits (0x4000) = 16384 -> 16384/65536 = 0.25
; The 48:16 fixed point encoding allows for fractional parts with [fixed]
; granularity 1/65536 = 0.0000152587890625, and the 48 bits whole part has
; range -140737488355328 to +140737488355327.
;
; The code is intentionally calculating equation as is, NOT rearranging it
; to "(c+d)*a*c - (c+d)*b/a", which would cover the example with 20, 10, 3, 5
; even with integer-only calculation.
;
; The code is also intentionally using only 80386 instructions, not exploiting
; any modern SSE/AVX options, as I'm not very good with those, plus this is
; supposed to demonstrate the classic way how the original x86 instruction set
; allows easily for more than simple integers math since early x86 CPUs.

                bits    64

fraction_bits   equ     16

%if 1
; unfortunately, NASM does not support compile time float->int conversions (due
; to portability reasons), so you have to manually calculate fraction part.
; [a, b, c, d] = [20, 10, 3, 5] example
val_a           equ     20
val_a_fraction  equ     0       ; fraction is value 0x0000-0xFFFF (0x8000 = 0.5)
val_b           equ     10
val_b_fraction  equ     0       ; fraction is value 0x0000-0xFFFF (0x8000 = 0.5)
val_c           equ     3
val_c_fraction  equ     0       ; fraction is value 0x0000-0xFFFF (0x8000 = 0.5)
val_d           equ     5
val_d_fraction  equ     0       ; fraction is value 0x0000-0xFFFF (0x8000 = 0.5)
; for values 20, 10, 3, 5 the result 476 is expected
val_expected    equ     476
val_expected_f  equ     0
%endif

%if 0
; [a, b, c, d] = [~8388608.07111, ~333.3333, ~4095.9999, -8191.0] example
; (8388608.07111*4095.9999-333.3333/8388608.07111)*(4095.9999-8191.0)
; = cca. -140703129810535.32829
val_a           equ     8388608     ; 2**23
val_a_fraction  equ     0x1234      ; 0x1234 = 0.07110595703125
val_b           equ     333
val_b_fraction  equ     21845       ; 21845 = 0.3333282470703125
val_c           equ     4095        ; 2**12-1
val_c_fraction  equ     0xFFFF      ; 0xFFFF = 0.9999847412109375
val_d           equ     -8191       ; -2**13+1
val_d_fraction  equ     0
; for these values the result cca. -140703129810535.32829 is expected
; Real result from code is         -140703129809756.5650482177734375
val_expected    equ     -140703129809757
val_expected_f  equ     0x6F59      ; 0x6F59 with negative whole part = ~0.56505
%endif

    global _start
    _start:
        ; load all values into register in the fixed-point 48:16 way
        mov     rsi, (val_a<<fraction_bits) + val_a_fraction
        mov     rbx, (val_b<<fraction_bits) + val_b_fraction
        mov     rcx, (val_c<<fraction_bits) + val_c_fraction
        mov     rdi, (val_d<<fraction_bits) + val_d_fraction
        mov     rbp, (val_expected<<fraction_bits) + val_expected_f
        ; rdi = d+c
        add     rdi, rcx
        ; rbx = b/a
        ; premultiply the dividend by fraction bits into rdx:rax (48:32 = 80b)
        mov     rax, rbx
        cqo                             ; rdx:rax = b
        shld    rdx, rax, fraction_bits
        shl     rax, fraction_bits      ; rdx:rax = b << fraction_bits
        ; now after idiv the result is correctly shifted in 48:16 way
        idiv    rsi                     ; rax = b/a
        mov     rbx, rax
        ; rax = a*c
        mov     rax, rsi
        imul    rcx                     ; rdx:rax = (a*c)<<fraction_bits (48:32)
        shrd    rax, rdx, fraction_bits ; normalize the value back to 48:16
        ; rax = a*c - b/a
        sub     rax, rbx
        ; rax = (a*c - b/a) * (c+d)
        imul    rdi                     ; rdx:rax = result<<fraction_bits
        shrd    rax, rdx, fraction_bits ; normalize the value back
        ; rax = 48:16 result (use `sar rax, fraction_bits` to cast to int)
        ; edi = false/true when expected result was calculated
        xor     edi, edi
        cmp     rax, rbp
        sete    dil
        ; exit back to linux with return 0/1 result
        mov     eax, 60
        syscall
	; (C) [copyleft] 2018 Peter Helcmanovsky
	; License: CC0 https://creativecommons.org/share-your-work/public-domain/cc0
	;
	; x86_64 linux asm example of fixed-point arithmetic
	; (see https://en.wikipedia.org/wiki/Fixed-point_arithmetic)
	;
	; to build I'm using nasm and ld:
	; nasm -f elf64 %f -l %n.lst -w+all; ld -b elf64-x86-64 -o %n %n.o
	; (where %f is source file name and %n is just the main part w/o extension)
	;
	; equation "(ac-b/a)(c+d)" done in fixed 48:16 point math fashion
	; all values are kept as 64 bit integers, where top 48 bits represent the "whole"
	; part and low 16 bits represent the fraction.
	; I.e. value 0x54000 represents value 5.25:
	; upper 48 bits = 5 (0x50000), low 16bits (0x4000) = 16384 -> 16384/65536 = 0.25
	; The 48:16 fixed point encoding allows for fractional parts with [fixed]
	; granularity 1/65536 = 0.0000152587890625, and the 48 bits whole part has
	; range -140737488355328 to +140737488355327.
	;
	; The code is intentionally calculating equation as is, NOT rearranging it
	; to "(c+d)ac - (c+d)*b/a", which would cover the example with 20, 10, 3, 5
	; even with integer-only calculation.
	;
	; The code is also intentionally using only 80386 instructions, not exploiting
	; any modern SSE/AVX options, as I'm not very good with those, plus this is
	; supposed to demonstrate the classic way how the original x86 instruction set
	; allows easily for more than simple integers math since early x86 CPUs.

	bits 64

	fraction_bits equ 16

	%if 1
	; unfortunately, NASM does not support compile time float->int conversions (due
	; to portability reasons), so you have to manually calculate fraction part.
	; [a, b, c, d] = [20, 10, 3, 5] example
	val_a equ 20
	val_a_fraction equ 0 ; fraction is value 0x0000-0xFFFF (0x8000 = 0.5)
	val_b equ 10
	val_b_fraction equ 0 ; fraction is value 0x0000-0xFFFF (0x8000 = 0.5)
	val_c equ 3
	val_c_fraction equ 0 ; fraction is value 0x0000-0xFFFF (0x8000 = 0.5)
	val_d equ 5
	val_d_fraction equ 0 ; fraction is value 0x0000-0xFFFF (0x8000 = 0.5)
	; for values 20, 10, 3, 5 the result 476 is expected
	val_expected equ 476
	val_expected_f equ 0
	%endif

	%if 0
	; [a, b, c, d] = [~8388608.07111, ~333.3333, ~4095.9999, -8191.0] example
	; (8388608.071114095.9999-333.3333/8388608.07111)(4095.9999-8191.0)
	; = cca. -140703129810535.32829
	val_a equ 8388608 ; 2**23
	val_a_fraction equ 0x1234 ; 0x1234 = 0.07110595703125
	val_b equ 333
	val_b_fraction equ 21845 ; 21845 = 0.3333282470703125
	val_c equ 4095 ; 2**12-1
	val_c_fraction equ 0xFFFF ; 0xFFFF = 0.9999847412109375
	val_d equ -8191 ; -2**13+1
	val_d_fraction equ 0
	; for these values the result cca. -140703129810535.32829 is expected
	; Real result from code is -140703129809756.5650482177734375
	val_expected equ -140703129809757
	val_expected_f equ 0x6F59 ; 0x6F59 with negative whole part = ~0.56505
	%endif

	global _start
	_start:
	; load all values into register in the fixed-point 48:16 way
	mov rsi, (val_a<<fraction_bits) + val_a_fraction
	mov rbx, (val_b<<fraction_bits) + val_b_fraction
	mov rcx, (val_c<<fraction_bits) + val_c_fraction
	mov rdi, (val_d<<fraction_bits) + val_d_fraction
	mov rbp, (val_expected<<fraction_bits) + val_expected_f
	; rdi = d+c
	add rdi, rcx
	; rbx = b/a
	; premultiply the dividend by fraction bits into rdx:rax (48:32 = 80b)
	mov rax, rbx
	cqo ; rdx:rax = b
	shld rdx, rax, fraction_bits
	shl rax, fraction_bits ; rdx:rax = b << fraction_bits
	; now after idiv the result is correctly shifted in 48:16 way
	idiv rsi ; rax = b/a
	mov rbx, rax
	; rax = a*c
	mov rax, rsi
	imul rcx ; rdx:rax = (a*c)<<fraction_bits (48:32)
	shrd rax, rdx, fraction_bits ; normalize the value back to 48:16
	; rax = a*c - b/a
	sub rax, rbx
	; rax = (ac - b/a) (c+d)
	imul rdi ; rdx:rax = result<<fraction_bits
	shrd rax, rdx, fraction_bits ; normalize the value back
	; rax = 48:16 result (use `sar rax, fraction_bits` to cast to int)
	; edi = false/true when expected result was calculated
	xor edi, edi
	cmp rax, rbp
	sete dil
	; exit back to linux with return 0/1 result
	mov eax, 60
	syscall