Skip to content

Instantly share code, notes, and snippets.

@webia1
Forked from jamesdiacono/dec64.nasm
Created April 14, 2023 07:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save webia1/ee456ed32ccb2c03164949b65b3c1b34 to your computer and use it in GitHub Desktop.
Save webia1/ee456ed32ccb2c03164949b65b3c1b34 to your computer and use it in GitHub Desktop.
The DEC64 number type for Linux, MacOS (Intel and Silicon) and Android. Include these files alongside https://github.com/douglascrockford/DEC64.
; title dec64.nasm for x64.
; dec64.com
; 2022-09-03
; Public Domain
; No warranty expressed or implied. Use at your own risk. You have been warned.
; This file implements the elementary arithmetic operations for DEC64, a decimal
; floating point type. DEC64 uses 64 bits to represent a number. The low order
; 8 bits contain an exponent that ranges from -127 to 127. The exponent is not
; biased. The exponent -128 is reserved for nan (not a number). The remaining
; 56 bits, including the sign bit, are the coefficient in the range
; -36028797018963968 thru 36028797018963967. The exponent and the coefficient
; are both two's complement signed numbers.
;
; The value of any non-nan DEC64 number is coefficient * (10 ** exponent).
;
; Rounding is to the nearest value. Ties are rounded away from zero. Integer
; division is floored. The result of modulo has the sign of the divisor.
; There is no negative zero.
;
; These operations will produce a result of nan:
;
; dec64_abs(nan)
; dec64_ceiling(nan)
; dec64_floor(nan)
; dec64_neg(nan)
; dec64_normal(nan)
; dec64_signum(nan)
;
; These operations will produce a result of zero for all values of n,
; even if n is nan:
;
; dec64_divide(0, n)
; dec64_integer_divide(0, n)
; dec64_modulo(0, n)
; dec64_multiply(0, n)
; dec64_multiply(n, 0)
;
; These operations will produce a result of nan for all values of n
; except zero:
;
; dec64_divide(n, 0)
; dec64_divide(n, nan)
; dec64_integer_divide(n, 0)
; dec64_integer_divide(n, nan)
; dec64_modulo(n, 0)
; dec64_modulo(n, nan)
; dec64_multiply(n, nan)
; dec64_multiply(nan, n)
;
; These operations will produce a result of nan for all values of n:
;
; dec64_add(n, nan)
; dec64_add(nan, n)
; dec64_divide(nan, n)
; dec64_integer_divide(nan, n)
; dec64_modulo(nan, n)
; dec64_round(nan, n)
; dec64_subtract(n, nan)
; dec64_subtract(nan, n)
;
; This file can be processed with Microsoft's ML64.exe. There might be other
; assemblers that can process this file, but that has not been tested.
;
; You know what goes great with those defective pentium chips?
; Defective pentium salsa! (David Letterman)
; All public symbols have a dec64_ prefix. All other symbols are private.
; There are 72057594037927936 possible nan values. Three are reserved:
;
; DEC64_NULL
; DEC64_TRUE
; DEC64_FALSE
;
; The other 72057594037927933 nan values can be used for any purpose,
; including object pointers.
; When these functions return nan, they will always return DEC64_NULL,
; the normal nan.
; The comparison functions will return DEC64_TRUE or DEC64_FALSE.
%define null 8000000000000080h
%define true 8000000000000380h
%define false 8000000000000280h
; Multiplication by a scaled reciprocal can be faster than division.
%define eight_over_ten -3689348814741910323
global dec64_abs;(number: dec64)
; returns absolution: dec64
global dec64_add;(augend: dec64, addend: dec64)
; returns sum: dec64
global dec64_ceiling;(number: dec64)
; returns integer: dec64
global dec64_coefficient;(number: dec64)
; returns coefficient: int64
global dec64_divide;(dividend: dec64, divisor: dec64)
; returns quotient: dec64
global dec64_exponent;(number: dec64)
; returns exponent: int64
global dec64_floor;(number: dec64)
; returns integer: dec64
global dec64_integer_divide;(dividend: dec64, divisor: dec64)
; returns quotient: dec64
global dec64_is_equal;(comparahend: dec64, comparator: dec64)
; returns comparison: dec64
global dec64_is_false;(boolean: dec64)
; returns comparison: dec64
global dec64_is_integer;(number: dec64)
; returns comparison: dec64
global dec64_is_less;(comparahend: dec64, comparator: dec64)
; returns comparison: dec64
global dec64_is_nan;(number: dec64)
; returns comparison: dec64
global dec64_is_zero;(number: dec64)
; returns comparison: dec64
global dec64_modulo;(dividend: dec64, divisor: dec64)
; returns modulus: dec64
global dec64_multiply;(multiplicand: dec64, multiplier: dec64)
; returns product: dec64
global dec64_neg;(number: dec64)
; returns negation: dec64
global dec64_new;(coefficient: int64, exponent: int64)
; returns number: dec64
global dec64_normal;(number: dec64)
; returns normalization: dec64
global dec64_round;(number: dec64, place: dec64)
; returns quantization: dec64
global dec64_signum;(number: dec64)
; returns signature: dec64
global dec64_subtract;(minuend: dec64, subtrahend: dec64)
; returns difference: dec64
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
; Repair the register names. Over the long and twisted evolution of x86, the
; register names have picked up some weird, inconsistent conventions. We can
; simplify, naming them r0 thru r15, with a _b suffix to indicate the low
; order byte, an _h suffix to indicate the so-called high byte and a _w suffix
; to indicate the lower 16 bits. We would use _d to indicate the lower 32 bits,
; but that was not needed here.
%define r0 rax
%define r1 rcx
%define r2 rdx
%define r6 rsi
%define r7 rdi
%define r0_b al
%define r1_b cl
%define r2_b dl
%define r8_b r8b
%define r9_b r9b
%define r10_b r10b
%define r11_b r11b
%define r0_h ah
%define r1_h ch
%define r2_h dh
%define r0_w ax
%define r1_w cx
%define jr1z jrcxz
; All of the public functions in this file accept up to two arguments, which
; are passed in registers (either r1, r2 or r7, r6), returning a result in r0.
; Registers r1, r2, r8, r9, r10, and r11 are clobbered. Register r0 is the
; return value. The other registers are not disturbed.
; There is painfully inadequate standardization around x64 calling conventions.
; On Win64, the first two arguments are passed in r1 and r2. On Unix, the first
; two arguments are passed in r7 and r6. We try to hide this behind macros. The
; two systems also have different conventions about which registers may be
; clobbered and which must be preserved. This code lives in the intersection.
; This has not yet been tested on Unix.
%define UNIX 1 ; calling convention: 0 for Windows, 1 for Unix
%macro function_with_one_parameter 0
%if UNIX
mov r1, r7 ;; UNIX
%endif
%endmacro
%macro function_with_two_parameters 0
%if UNIX
mov r1, r7 ;; UNIX
mov r2, r6 ;; UNIX
%endif
%endmacro
%macro call_with_one_parameter 1
%if UNIX
mov r7, r1 ;; UNIX
%endif
call %1
%endmacro
%macro call_with_two_parameters 1
%if UNIX
mov r7, r1 ;; UNIX
mov r6, r2 ;; UNIX
%endif
call %1
%endmacro
%macro tail_with_one_parameter 1
%if UNIX
mov r7, r1 ;; UNIX
%endif
jmp %1
%endmacro
%macro tail_with_two_parameters 1
%if UNIX
mov r7, r1 ;; UNIX
mov r6, r2 ;; UNIX
%endif
jmp %1
%endmacro
; There may be a performance benefit in padding programs so that most jump
; destinations are aligned on 16 byte boundaries.
%macro pad 0
align 16
%endmacro
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_code section .text
power: ; the powers of 10
dq 1 ; 0
dq 10 ; 1
dq 100 ; 2
dq 1000 ; 3
dq 10000 ; 4
dq 100000 ; 5
dq 1000000 ; 6
dq 10000000 ; 7
dq 100000000 ; 8
dq 1000000000 ; 9
dq 10000000000 ; 10
dq 100000000000 ; 11
dq 1000000000000 ; 12
dq 10000000000000 ; 13
dq 100000000000000 ; 14
dq 1000000000000000 ; 15
dq 10000000000000000 ; 16
dq 100000000000000000 ; 17
dq 1000000000000000000 ; 18
dq 10000000000000000000 ; 19
; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_coefficient: function_with_one_parameter
;(number: dec64) returns coefficient: int64
; Return the coefficient part from a dec64 number.
mov r0, r1
sar r0, 8
ret
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_exponent: function_with_one_parameter
;(number: dec64) returns exponent: int64
; Return the exponent part, sign extended to 64 bits, from a dec64 number.
; dec64_exponent(nan) returns -128.
movsx r0, r1_b ; r0 is the exponent
ret
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_new: function_with_two_parameters
;(coefficient: int64, exponent: int64) returns number: dec64
; Construct a new dec64 number with a coefficient and an exponent.
mov r0, r1 ; r0 is the coefficient
test r0, r0 ; is the coefficient zero?
jz return_zero
mov r8, r2 ; r8 is the exponent
; Fall into pack.
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
pack:
; The pack function will combine the coefficient and exponent into a dec64.
; Numbers that are too huge to be contained in this format become nan.
; Numbers that are too tiny to be contained in this format become zero.
; The coefficient is in r0.
; The exponent is in r8.
; If the exponent is greater than 127, then the number is too big.
; But it might still be possible to salvage a value.
cmp r8, 127 ; compare exponent with 127
jg pack_decrease ; it might be possible to decrease it
; If the exponent is too small, or if the coefficient is too large, then some
; division is necessary. The absolute value of the coefficient is off by one
; for the negative because
; negative_extreme_coefficent = -(extreme_coefficent + 1)
mov r10, r0 ; r10 is the coefficient
mov r1, 3602879701896396800 ; the ultimate coefficient * 100
not r10 ; r10 is -coefficient
xor r11, r11 ; r11 is zero
test r10, r10 ; look at the sign bit
cmovs r10, r0 ; r10 is the absolute value of coefficient
cmp r10, r1 ; compare with the actual coefficient
jae pack_large ; deal with the very large coefficient
mov r1, 36028797018963967 ; the ultimate coefficient - 1
mov r9, -127 ; r9 is the ultimate exponent
cmp r1, r10 ; compare with the actual coefficient
adc r11, 0 ; add 1 to r11 if 1 digit too big
mov r1, 360287970189639679 ; the ultimate coefficient * 10 - 1
sub r9, r8 ; r9 is the difference from actual exponent
cmp r1, r10 ; compare with the actual coefficient
adc r11, 0 ; add 1 to r11 if 2 digits too big
cmp r9, r11 ; which excess is larger?
cmovl r9, r11 ; take the max
test r9, r9 ; if neither was zero
jnz pack_increase ; then increase the exponent by the excess
shl r0, 8 ; shift the exponent into position
cmovz r8, r0 ; if coefficient is zero, also zero the exp
movzx r8, r8_b ; zero out all but the bottom 8 bits of exp
or r0, r8 ; mix in the exponent
ret ; whew
pad
pack_large:
mov r1, r0 ; r1 is the coefficient
sar r1, 63 ; r1 is -1 if negative, or 0 if positive
mov r11, eight_over_ten ; magic number
mov r9, r1 ; r9 is -1 or 0
xor r0, r1 ; complement the coefficient if negative
and r9, 1 ; r9 is 1 or 0
add r0, r9 ; r0 is absolute value of coefficient
add r8, 1 ; add 1 to the exponent
mul r11 ; multiply abs(coefficient) by magic number
mov r0, r2 ; r0 is the product shift 64 bits
shr r0, 3 ; r0 is divided by 8: abs(coefficient) / 10
xor r0, r1 ; complement coefficient if it was negative
add r0, r9 ; coefficient's sign is restored
jmp pack ; start over
pad
pack_increase:
mov r10, power
mov r10, [r10+r9*8] ; r10 is 10^r9
cmp r9, 20 ; is the difference more than 20?
jae return_zero ; if so, the result is zero (rare)
mov r11, r10 ; r11 is the power of ten
neg r11 ; r11 is the negation of the power of ten
test r0, r0 ; examine the sign of the coefficient
cmovns r11, r10 ; r11 has the sign of the coefficient
sar r11, 1 ; r11 is half the signed power of ten
add r0, r11 ; r0 is coefficient plus the rounding fudge
cqo ; sign extend r0 into r2
idiv r10 ; divide by the power of ten
add r8, r9 ; increase the exponent
jmp pack ; start over
pad
pack_decrease:
; The exponent is too big. We can attempt to reduce it by scaling back.
; This can salvage values in a small set of cases.
imul r0, 10 ; try multiplying the coefficient by 10
jo return_null ; if it overflows, we failed to salvage
sub r8, 1 ; decrement the exponent
test r8, -128 ; is the exponent still over 127?
jnz pack_decrease ; until the exponent is less than 127
mov r9, r0 ; r9 is the coefficient
sar r9, 56 ; r9 is top 8 bits of the coefficient
adc r9, 0 ; add the ninth bit
jnz return_null ; the number is still too large
shl r0, 8 ; shift the exponent into position
cmovz r8, r0 ; if coefficient is zero, also zero exponent
movzx r8, r8_b ; zero out all but bottom 8 bits of exponent
or r0, r8 ; mix in the exponent
ret
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_round: function_with_two_parameters
;(number: dec64, place: dec64) returns quantization: dec64
; The place argument indicates at what decimal place to round.
; -2 nearest cent
; 0 nearest integer
; 3 nearest thousand
; 6 nearest million
; 9 nearest billion
; The place should be between -16 and 16.
pad
round_begin:
cmp r1_b, 128 ; is the number nan?
jz return_null
test r2_b, r2_b ; is places already an integer?
jnz round_places ; nope
mov r10, r1 ; r10 is the number
mov r11, r2 ; r11 is the places
movsx r8, r10_b ; r8 is the current exponent
mov r0, r10 ; r0 is the coefficient
sar r11, 8 ; r11 is place: int64
sar r0, 8 ; r0 is the coefficient of the number
jz return_zero ; if the coefficient is 0, the result is 0
cmp r8, r11 ; compare the exponents
jge pack ; no rounding required
mov r1, r0
mov r9, eight_over_ten ; magic
neg r1 ; r1 is -coefficient
cmovns r0, r1 ; r0 is abs(coefficient)
pad
round_loop:
; Increment the exponent and divide the coefficient by 10 until the target
; exponent is reached. The division is accomplished by multiplying with a
; scaled reciprocal.
mul r9 ; r2 is the coefficient * 8 / 10
mov r0, r2 ; r0 is the coefficient * 8 / 10
sar r0, 3 ; r0 is the coefficient / 10
add r8, 1 ; increment the exponent
cmp r8, r11 ; compare the exponent with the target
jne round_loop ; loop if the exponent is not at the target
; Round if necessary and return the result.
shr r2, 2 ; isolate the carry bit
and r2, 1 ; r2 is 1 if rounding is needed
add r0, r2 ; r0 is rounded
mov r1, r0
neg r1
test r10, r10 ; was the original number negative?
cmovs r0, r1
jmp pack ; pack it up
pad
round_places:
; Places does not seem to be an integer. If it is nan, then default to zero.
cmp r2_b, 128
jne round_normal
xor r2, r2
jmp round_begin
pad
round_normal:
mov r10, r1 ; save the number
mov r1, r2 ; pass the place
call_with_one_parameter dec64_normal
test r0_b, r0_b ; is the number of places a normal integer?
jnz return_null
mov r1, r10
mov r2, r0
jmp round_begin
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_add: function_with_two_parameters
;(augend: dec64, addend: dec64) returns sum: dec64
; Add two dec64 numbers together.
; If the two exponents are both zero (which is usually the case for integers)
; we can take the fast path. Since the exponents are both zero, we can simply
; add the numbers together and check for overflow.
pad
add_begin:
mov r0, r1 ; r0 is the first number
or r1_b, r2_b ; r1_b is the two exponents or'd together
jnz add_slow ; if either exponent is not zero, slow path
add r0, r2 ; add the shifted coefficients together
jo add_overflow ; if there was no overflow, we are done
ret ; no need to pack
pad
add_overflow:
; If there was an overflow (extremely unlikely) then we must make it fit.
; pack knows how to do that.
rcr r0, 1 ; divide the sum by 2 and repair its sign
movsx r8, r1_b ; r8 is the exponent
sar r0, 7 ; r0 is the coefficient of the sum
jmp pack ; pack it up
pad
add_slow:
; The slow path is taken if the two operands do not both have zero exponents.
mov r1, r0 ; restore r1
cmp r0_b, 128 ; is the first operand nan?
je return_null ; if nan, get out
; Are the two exponents the same? This will happen often, especially with
; money values.
cmp r1_b, r2_b ; compare the two exponents
jne add_slower ; if not equal, take the slower path
; The exponents match so we may add now. Zero out the exponents so there
; will be no carry into the coefficients when the coefficients are added.
; If the result is zero, then return the normal zero.
and r0, -256 ; remove the exponent
and r2, -256 ; remove the other exponent
add r0, r2 ; add the shifted coefficients
jo add_overflow ; if it overflows, it must be repaired
cmovz r1, r0 ; if coefficient is zero, exponent is zero
movzx r1, r1_b ; mask the exponent
or r0, r1 ; mix in the exponent
ret ; no need to pack
pad
add_slower:
; The slower path is taken when neither operand is nan, and their
; exponents are different. Before addition can take place, the exponents
; must be made to match. Swap the numbers if the second exponent is greater
; than the first.
cmp r2_b, 128 ; Is the second operand nan?
je return_null
cmp r1_b, r2_b ; compare the exponents
mov r0, r1 ; r0 is the first number
cmovl r1, r2 ; r1 is the number with the larger exponent
cmovl r2, r0 ; r2 is the number with the smaller exponent
; Shift the coefficients of r1 and r2 into r10 and r11 and unpack the exponents.
mov r10, r1 ; r10 is the first number
mov r11, r2 ; r11 is the second number
movsx r8, r1_b ; r8 is the first exponent
movsx r9, r2_b ; r9 is the second exponent
sar r10, 8 ; r10 is the first coefficient
sar r11, 8 ; r11 is the second coefficient
mov r0, r10 ; r0 is the first coefficient
pad
add_slower_decrease:
; The coefficients are not the same. Before we can add, they must be the same.
; We will try to decrease the first exponent. When we decrease the exponent
; by 1, we must also multiply the coefficient by 10. We can do this as long as
; there is no overflow. We have 8 extra bits to work with, so we can do this
; at least twice, possibly more.
imul r0, 10 ; before decrementing the exponent, multiply
jo add_slower_increase
sub r8, 1 ; decrease the first exponent
mov r10, r0 ; r10 is the enlarged first coefficient
cmp r8, r9 ; are the exponents equal yet?
jg add_slower_decrease
mov r0, r11 ; r0 is the second coefficient
; The exponents are now equal, so the coefficients may be added.
add r0, r10 ; add the two coefficients
jmp pack ; pack it up
pad
add_slower_increase:
; We cannot decrease the first exponent any more, so we must instead try to
; increase the second exponent, which will result in a loss of significance.
; That is the heartbreak of floating point.
; Determine how many places need to be shifted. If it is more than 17, there is
; nothing more to add.
mov r2, r8 ; r2 is the first exponent
sub r2, r9 ; r2 is the remaining exponent difference
mov r0, r11 ; r0 is the second coefficient
cmp r2, 17 ; 17 is the max digits in a packed coefficient
ja return_r1 ; too small to matter
mov r9, power
mov r9, [r9+r2*8] ; r9 is the power of ten
cqo ; sign extend r0 into r2
idiv r9 ; divide second coefficient by power of 10
test r0, r0 ; examine the scaled coefficient
jz return_r1 ; too insignificant to add?
; The exponents are now equal, so the coefficients may be added.
add r0, r10 ; add the two coefficients
jmp pack
pad
return_r1:
mov r0, r1 ; r0 is the original number
ret
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_ceiling: function_with_one_parameter
;(number: dec64) returns integer: dec64
; Produce the smallest integer that is greater than or equal to the number. In
; the result, the exponent will be greater than or equal to zero unless it is
; nan. Numbers with positive exponents will not be modified, even if the
; numbers are outside of the safe integer range.
; Preserved: r11.
mov r9, 1 ; r9 is the round up flag
jmp floor_begin
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_floor: function_with_one_parameter
;(number: dec64) returns integer: dec64
; Produce the largest integer that is less than or equal to the number. This
; is sometimes called the entier function. In the result, the exponent will be
; greater than or equal to zero unless it is nan. Numbers with positive
; exponents will not be modified, even if the numbers are outside of the safe
; integer range.
; Preserved: r11.
mov r9, -1 ; r9 is the round down flag
pad
floor_begin:
cmp r1_b, 128
je return_null
mov r0, r1 ; r0 is the number
movsx r8, r1_b ; r8 is the exponent
sar r0, 8 ; r0 is the coefficient
cmovz r1, r0 ; if coefficient is zero, the number is zero
neg r8 ; r8 is the negated exponent
test r1_b, r1_b ; examine the exponent
jns return_r1 ; nothing to do unless exponent was negative
cmp r8, 17 ; is the exponent is too extreme?
jae floor_micro ; deal with a micro number
mov r10, power
mov r10, [r10+r8*8] ; r10 is the power of ten
cqo ; sign extend r0 into r2
idiv r10 ; divide r2:r0 by 10
test r2, r2 ; examine the remainder
jnz floor_remains ; deal with the remainder
shl r0, 8 ; pack the coefficient
ret
pad
floor_micro:
mov r2, r0 ; r2 is the coefficient
xor r0, r0 ; r0 is zero
pad
floor_remains:
; If the remainder is negative and the rounding flag is negative, then we need
; to decrement r0. But if the remainder and the rounding flag are both
; positive, then we need to increment r0.
xor r10, r10 ; r10 is zero
xor r2, r9 ; xor the remainder and the rounding
cmovs r9, r10 ; if different signs, clear the rounding
add r0, r9 ; add the rounding to the coefficient
shl r0, 8 ; pack the coefficient
ret
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_subtract: function_with_two_parameters
;(minuend: dec64, subtrahend: dec64) returns difference: dec64
; Subtract the dec64 number in r2 from the dec64 number in r1.
; The result is in r0.
; This is the same as dec64_add, except that the operand in r2 has its
; coefficient complemented first.
xor r2, -256 ; not the coefficient only
add r2, 256 ; two's complement increment of coefficient
jno add_begin ; if there is no overflow, begin the beguine
; The subtrahend coefficient is -36028797018963968. This value cannot easily be
; complemented, so take the slower path. This should be extremely rare.
cmp r1_b, 128 ; is the first operand nan
sete r0_b
cmp r2_b, 128 ; is the second operand nan?
sete r0_h
or r0_b, r0_h ; is either nan?
jnz return_null
mov r10, r1 ; r10 is the first coefficient
movsx r8, r1_b ; r8 is the first exponent
sar r10, 8
movsx r9, r2_b ; r9 is the second exponent
mov r11, 80000000000000H ; r11 is 36028797018963968
mov r0, r10 ; r0 is the first coefficient
cmp r8, r9 ; if the second exponent is larger, swap
jge subtract_slower_decrease_compare
mov r0, r11 ; r0 is the second coefficient
xchg r8, r9 ; swap the exponents
xchg r10, r11 ; swap the coefficients
jmp subtract_slower_decrease_compare
pad
subtract_slower_decrease:
; The coefficients are not the same. Before we can add, they must be the same.
; We will try to decrease the first exponent. When we decrease the exponent
; by 1, we must also multiply the coefficient by 10. We can do this as long as
; there is no overflow. We have 8 extra bits to work with, so we can do this
; at least twice, possibly more.
imul r0, 10 ; before decrementing the exponent, multiply
jo subtract_slower_increase
sub r8, 1 ; decrease the first exponent
mov r10, r0 ; r10 is the enlarged first coefficient
pad
subtract_slower_decrease_compare:
cmp r8, r9 ; are the exponents equal yet?
jg subtract_slower_decrease
mov r0, r11 ; r0 is the second coefficient
; The exponents are now equal, so the coefficients may be added.
add r0, r10 ; add the two coefficients
jmp pack ; pack it up
pad
subtract_slower_increase:
; We cannot decrease the first exponent any more, so we must instead try to
; increase the second exponent, which will result in a loss of significance.
; That is the heartbreak of floating point.
; Determine how many places need to be shifted. If it is more than 17, there is
; nothing more to add.
mov r2, r8 ; r2 is the first exponent
sub r2, r9 ; r2 is the remaining exponent difference
mov r0, r11 ; r0 is the second coefficient
cmp r2, 17 ; 17 is the max digits in packed coefficient
ja subtract_underflow ; too small to matter
mov r9, power
mov r9, [r9+r2*8] ; r9 is the power of ten
cqo ; sign extend r0 into r2
idiv r9 ; divide second coefficient by power of 10
; The exponents are now equal, so the coefficients may be added.
add r0, r10 ; add the two coefficients
jmp pack
pad
subtract_underflow:
mov r0, r10 ; r0 is the first coefficient
jmp pack
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_multiply: function_with_two_parameters
;(multiplicand: dec64, multiplier: dec64) returns product: dec64
; Multiply two dec64 numbers together.
; Unpack the exponents into r8 and r9.
movsx r8, r1_b ; r8 is the first exponent
movsx r9, r2_b ; r9 is the second exponent
; Set flags in r0 if either operand is nan.
cmp r1_b, 128 ; is the first operand nan?
sete r0_b ; r0_b is 1 if the first operand is nan
cmp r2_b, 128 ; is the second operand nan?
sete r0_h ; r0_h is 1 if the second operand is nan
; Unpack the coefficients. Set flags in r1 if either is not zero.
sar r1, 8 ; r1 is the first coefficient
mov r10, r1 ; r10 is the first coefficient
setnz r1_b ; r1_b is 1 if first coefficient is not zero
sar r2, 8 ; r2 is the second coefficient
setnz r1_h ; r1_h is 1 if second coefficient is not 0
; The result is nan if one or both of the operands is nan and neither of the
; operands is zero.
or r1_w, r0_w ; is either coefficient zero and not nan?
xchg r1_b, r1_h
test r0_w, r1_w
jnz return_null
mov r0, r10 ; r0 is the first coefficient
add r8, r9 ; r8 is the product exponent
imul r2 ; r2:r0 is r1 * r2
jno pack ; if product fits in 64 bits, start packing
; There was overflow.
; Make the 110 bit coefficient in r2:r0Er8 all fit. Estimate the number of
; digits of excess, and increase the exponent by that many digits.
; We use 77/256 to convert log2 to log10.
mov r9, r2 ; r9 is the excess significance
xor r1, r1 ; r1 is zero anticipating bsr
neg r9
cmovs r9, r2 ; r9 is abs of excess significance
bsr r1, r9 ; find the position of most significant bit
imul r1, 77 ; multiply the bit number by 77/256 to
shr r1, 8 ; convert a bit number to a digit number
add r1, 2 ; add two extra digits to the scale
add r8, r1 ; increase the exponent
mov r9, power
idiv qword [r9+r1*8] ; divide by the power of ten
jmp pack
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_divide: function_with_two_parameters
;(dividend: dec64, divisor: dec64) returns quotient: dec64
; Divide a dec64 number by another.
; Begin unpacking the components.
cmp r2, 200h
jz divide_two
movsx r8, r1_b ; r8 is the first exponent
movsx r9, r2_b ; r9 is the second exponent
mov r10, r1 ; r10 is the first number
mov r11, r2 ; r11 is the second number
; Set nan flags in r0.
cmp r1_b, 128 ; is the first operand nan?
sete r0_b ; r0_b is 1 if the first operand is nan
cmp r2_b, 128 ; is the second operand nan?
sete r0_h ; r0_h is 1 if the second operand is nan
sar r10, 8 ; r10 is the dividend coefficient
setnz r1_b ; r1_b is 1 if dividend coefficient is not 0
or r1_b, r0_b ; r0_b is 1 if either is nan or divisor is 0
jz return_zero ; if the dividend is zero, quotient is zero
sar r11, 8 ; r11 is the divisor coefficient
setz r1_b ; r1_h is 1 if dividing by zero
or r0_h, r0_b ; r0_h is 1 if either is nan
or r1_b, r0_h ; r1_b is zero if dividend is zero & not nan
jnz return_null
sub r8, r9 ; r8 is the quotient exponent
pad
divide_measure:
; We want to get as many bits into the quotient as possible in order to capture
; enough significance. But if the quotient has more than 64 bits, then there
; will be a hardware fault. To avoid that, we compare the magnitudes of the
; dividend coefficient and divisor coefficient, and use that to scale the
; dividend to give us a good quotient.
mov r0, r10 ; r0 is the first coefficient
mov r1, r11 ; r1 is the second coefficient
neg r0 ; r0 is negated
cmovs r0, r10 ; r0 is abs of dividend coefficient
neg r1 ; r1 is negated
cmovs r1, r11 ; r1 is abs of divisor coefficient
bsr r0, r0 ; r0 is the dividend most significant bit
bsr r1, r1 ; r1 is the divisor most significant bit
; Scale up the dividend to be approximately 58 bits longer than the divisor.
; Scaling uses factors of 10, so we must convert from a bit count to a digit
; count by multiplication by 77/256 (approximately LN2/LN10).
add r1, 58 ; we want approx 58 bits in raw quotient
sub r1, r0 ; r1 is the number of bits to add to the dividend
imul r1, 77 ; multiply by 77/256 to convert bits|digits
shr r1, 8 ; r1 is number of digits to scale dividend
; The largest power of 10 that can be held in an int64 is 1e18.
cmp r1, 18 ; prescale the dividend if 10**r1 won't fit
jg divide_prescale
; Multiply the dividend by the scale factor, and divide that 128 bit result by
; the divisor. Because of the scaling, the quotient is guaranteed to use most
; of the 64 bits in r0, and never more. Reduce the final exponent by the number
; of digits scaled.
mov r0, r10 ; r0 is the dividend coefficient
mov r9, power
imul qword [r9+r1*8] ; r2:r0 is the dividend coefficient * 10**r1
idiv r11 ; r0 is the quotient
sub r8, r1 ; r8 is the exponent
jmp pack ; pack it up
pad
divide_prescale:
; If the number of scaling digits is larger than 18, then we will have to
; scale in two steps: first prescaling the dividend to fill a register, and
; then repeating to fill a second register. This happens when the divisor
; coefficient is much larger than the dividend coefficient.
mov r1, 58 ; we want 58 bits or so in the dividend
sub r1, r0 ; r1 is the number of additional bits needed
imul r1, 77 ; convert bits to digits
shr r1, 8 ; shift 8 is cheaper than div 256
mov r9, power
imul r10, qword [r9+r1*8] ; multiply the dividend by power of ten
sub r8, r1 ; reduce the exponent
jmp divide_measure ; try again
divide_two:
; Divide a dec64 number by two.
cmp r1_b, 128
je return_null
test r1_h, 1
jz divide_half
; Unpack the components into r8 and r1, multiply by 5 and divide by 10.
movsx r8, r1_b ; r8 is the exponent
sar r1, 8 ; r1 is the coefficient
sub r8, 1 ; bump down the exponent
lea r0, [r1+r1*4] ; r0 is the coefficient * 5
jmp pack
pad
divide_half:
; If the least significant bit of the coefficient is 0, then we can do this
; the fast way. Shift the coefficient by 1 bit and restore the exponent. If
; the shift produces zero, even easier.
mov r0, -256 ; r0 is the coefficient mask
and r0, r1 ; r0 is the coefficient shifted 8 bits
jz return
sar r0, 1 ; r0 is divided by 2
movzx r1, r1_b ; zero out r1 except lowest 8 bits
or r0, r1 ; mix in the exponent
ret
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_integer_divide: function_with_two_parameters
;(dividend: dec64, divisor: dec64) returns quotient: dec64
; Divide, with a floored integer result. It produces the same result as
; dec64_floor(dec64_divide(dividend, divisor))
; but can sometimes produce that result more quickly.
cmp r1_b, r2_b ; are the exponents equal?
jne integer_divide_slow
mov r0, r1 ; r0 is the dividend
mov r11, r2 ; r11 is the divisor
sar r1, 8 ; r1 is the dividend coefficient
setnz r2_h ; r2_h is 1 if dividend coefficient is not 0
cmp r0_b, 128 ; are the operands nan?
sete r2_b ; r2_b is 1 if the operands are nan
and r0, -256 ; zero the dividend's exponent
or r2_h, r2_b ; r2_h is zero if dividend is 0 and not nan
jz return_zero ; the quotient is zero if dividend is zero
sar r11, 8 ; r11 is the divisor coefficient
setz r2_h ; r2_h is 1 if divisor coefficient is zero
or r2_b, r2_h ; r2_b is 1 if the result is nan
jnz return_null
cqo ; sign extend r0 into r2
idiv r11 ; r0 is the quotient
and r0, -256 ; zero the exponent again
ret ; no need to pack
pad
integer_divide_slow:
; The exponents are not the same, so do it the hard way.
call_with_two_parameters dec64_divide
mov r1, r0
tail_with_one_parameter dec64_floor
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_modulo: function_with_two_parameters
;(dividend: dec64, divisor: dec64) returns modulus: dec64
; Modulo. It produces the same result as
; dec64_subtract(
; dividend,
; dec64_multiply(
; dec64_integer_divide(dividend, divisor),
; divisor
; )
; )
cmp r1_b, r2_b ; are the two exponents the same?
jnz modulo_slow ; if not take the slow path
mov r0, r1 ; r0 is the dividend
mov r11, r2 ; r11 is the divisor
cmp r1_b, 128 ; is the first operand nan?
setne r2_h ; r2_h is 1 if the operands are not nan
sar r0, 8 ; r0 is the dividend coefficient
setz r2_b ; r2_b is 1 if dividend coefficient is zero
test r2_b, r2_h ; is the dividend is zero and not nan?
jnz return_zero ; quotient is zero if the dividend is zero
sar r11, 8 ; r11 is the divisor coefficient
setnz r2_b ; r2_b is 1 if divisor coefficient is not 0
test r2_h, r2_b ; is either operand nan or is divisor zero?
jz return_null
cqo ; sign extend r0 into r2
idiv r11 ; divide r2:r0 by the divisor
; If the signs of the divisor and remainder are different and the remainder is
; not zero, add the divisor to the remainder.
xor r0, r0 ; r0 is zero
mov r10, r2 ; r10 is the remainder
test r2, r2 ; examine the remainder
cmovz r11, r0 ; r11 is zero if the remainder is zero
xor r10, r11 ; r10 is remainder xor divisor
cmovs r0, r11 ; r0 is divisor if the signs were different
add r0, r2 ; r0 is the corrected result
cmovz r1, r0 ; if r0 is zero, so is the exponent
shl r0, 8 ; position the coefficient
mov r0_b, r1_b ; mix in the exponent
ret
pad
modulo_slow:
; The exponents are not the same, so do it the hard way.
push r1 ; save the dividend
push r2 ; save the divisor
call_with_two_parameters dec64_integer_divide
pop r2
mov r1, r0
call_with_two_parameters dec64_multiply
pop r1
mov r2, r0
tail_with_two_parameters dec64_subtract
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_signum: function_with_one_parameter
;(number: dec64) returns signature: dec64
; If the number is nan, the result is nan.
; If the number is less than zero, the result is -1.
; If the number is zero, the result is 0.
; If the number is greater than zero, the result is 1.
cmp r1_b, 128
jz return_null
mov r0, r1 ; r0 is the number
sar r0, 8 ; r0 is the coefficient
mov r8, r0 ; r8 is the coefficient too
neg r0 ; r0 is -coefficient
sar r8, 63 ; r8 is extended sign: -1 if negative else 0
shr r0, 63 ; r0 is 1 if the number was greater than 0
or r0, r8 ; r0 is -1, 0, or 1
shl r0, 8 ; package the number with a zero exponent
ret
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_neg: function_with_one_parameter
;(number: dec64) returns negation: dec64
; Negate a number. We need to negate the coefficient without changing the
; exponent.
cmp r1_b, 128 ; compare the exponent to nan
je return_null ; is the number nan?
mov r2, r1 ; r2 is the number
mov r0, -256 ; r0 is the coefficient mask
sar r2, 8 ; r2 is the coefficient
cmovz r1, r2 ; if coefficient is zero, then exponent too
xor r0, r1 ; r0 has exponent and complemented coef
add r0, 256 ; r0 has exponent and negated coefficient
jo neg_overflow ; nice day if it don't overflow
ret
neg_overflow:
; The coefficient is -36028797018963968, which is the only coefficient that
; cannot be trivially negated. So we do this the hard way.
mov r0, r2
movsx r8, r1_b ; r8 is the exponent
neg r0
jmp pack
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_abs: function_with_one_parameter
;(number: dec64) returns absolution: dec64
; Find the absolute value of a number. If the number is negative, hand it off
; to dec64_neg. Otherwise, return the number unless it is nan or zero.
cmp r1_b, 128 ; is the number nan?
je return_null
test r1, r1 ; examine r1
js dec64_neg ; is the number negative?
mov r0, r1 ; r0 is the number
sar r1, 8 ; r1 is the coefficient
cmovz r0, r1 ; if coefficient is zero, result is zero
ret ; return the number
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_is_equal: function_with_two_parameters
;(comparahend: dec64, comparator: dec64) returns comparison: dec64
; Compare two dec64 numbers. If they are equal, return 1, otherwise return 0.
; Denormal zeroes are equal but denormal nans are not.
; If the numbers are trivally equal, then return 1.
cmp r1, r2 ; compare the two numbers
je return_true
; If the comparator is nan, then we ask dec64_is_nan.
cmp r2_b, 128
je dec64_is_nan
; If the exponents match or if their signs are different, then return false.
mov r0, r1 ; r0 is the first number
xor r0, r2 ; r0 the xor of the two numbers
sets r0_h ; r0_h is 1 if the signs are different
cmp r1_b, r2_b ; compare the two exponents
sete r0_b ; r0_b is 1 if the exponents are the same
or r0_b, r0_h
jnz return_false
; Do it the hard way by subtraction. Is the difference zero?
call_with_two_parameters dec64_subtract ; r0 is r1 - r2
cmp r0_b, 128 ; is the difference nan?
je return_false
or r0, r0 ; examine the difference
jz return_true
jmp return_false
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_normal: function_with_one_parameter
;(number: dec64) returns normalization: dec64
; Make the exponent as close to zero as possible without losing any signficance.
; Usually normalization is not needed since it does not materially change the
; value of a number.
; Preserves r10 and r11.
mov r0, r1 ; r0 is the number
cmp r1_b, 128 ; compare the exponent to nan
jz return_null ; if exponent is nan, the result is nan
and r0, -256 ; r0 is the coefficient shifted 8 bits
mov r8, 10 ; r8 is the divisor
cmovz r1, r0 ; r1 is zero if r0 is zero
mov r9, r0 ; r9 is the coefficient shifted 8 bits
test r1_b, r1_b ; examine the exponent
jz return ; if the exponent is zero, return r0
jns normal_multiply ; if the exponent is positive
sar r0, 8 ; r0 is the coefficient
sar r9, 8 ; r9 is the coefficient
pad
normal_divide:
; While the exponent is less than zero, divide the coefficient by 10 and
; increment the exponent.
cqo ; sign extend r0 into r2
idiv r8 ; divide r2:r0 by 10
test r2, r2 ; examine the remainder
jnz normal_divide_done ; if r2 is not zero, we are done
mov r9, r0 ; r9 is the coefficient
add r1_b, 1 ; increment the exponent
jnz normal_divide ; until the exponent is zero
pad
normal_divide_done:
mov r0, r9 ; r0 is the finished coefficient
shl r0, 8 ; put it in position
mov r0_b, r1_b ; mix in the exponent
ret
pad
normal_multiply:
; While the exponent is greater than zero, multiply the coefficient by 10 and
; decrement the exponent. If the coefficient gets too large, wrap it up.
imul r0, 10 ; r0 is r0 * 10
jo normal_multiply_done ; return zero if overflow
mov r9, r0 ; r9 is the coefficient
sub r1_b, 1 ; decrement the exponent
jnz normal_multiply ; until the exponent is zero
ret
pad
normal_multiply_done:
mov r0, r9 ; r0 is the finished positioned coefficient
mov r0_b, r1_b ; mix in the exponent
ret
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
return:
; Return whatever is in r0.
ret
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
return_null:
; All of the dec64_ functions return only this form of nan.
mov r0, null
ret
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
return_one:
mov r0, 0100h ; one
ret
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
return_zero:
xor r0, r0 ; zero
ret
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_is_less: function_with_two_parameters
;(comparahend: dec64, comparator: dec64) returns comparison: dec64
; Compare two dec64 numbers. If the first is less than the second, return true,
; otherwise return false. Any nan value is greater than any number value
; The other 3 comparison functions are easily implemented with dec64_is_less:
; dec64_is_greater(a, b) => dec64_is_less(b, a)
; dec64_is_greater_or_equal(a, b) => dec64_is_false(dec64_is_less(a, b))
; dec64_is_less_or_equal(a, b) => dec64_is_false(dec64_is_less(b, a))
; If the exponents are the same, then do a simple compare.
cmp r1_b, r2_b ; are the two exponents equal?
jne less_slow ; deal with unequal exponents
cmp r1, r2 ; compare the numbers
jge return_false ; greater or equal
cmp r1_b, 128 ; are the arguments nan?
jne return_true ; less than
jmp return_false ; nan is not less than nan
pad
less_slow:
; The exponents are not the same.
cmp r1_b, 128 ; is the first argument nan?
je return_false ; nan is greater than numbers
cmp r2_b, 128 ; is the second argument nan?
je return_true ; numbers are less than nan
; Unpack the numbers.
mov r11, r2 ; r11 is the second number
movsx r8, r1_b ; r8 is the first exponent
movsx r9, r2_b ; r9 is the second exponent
mov r0, power ; r0 is the power array
mov r2, 18 ; r2 is 18
sar r1, 8 ; r1 is the first coefficient
sar r11, 8 ; r11 is the second coefficient
; The maximum cofficient is 36028797018963967. 10**18 is more than that.
sub r8, r9 ; r8 is the exponent difference
js less_second ; is the difference negative?
cmp r8, r2 ; compare the exponent difference to 18
cmovge r8, r2 ; r8 is min(exponent difference, 18)
; We need to make them conform before we can compare. Multiply the first
; coefficient by 10**(first exponent - second exponent)
mov r0, [r0+r8*8] ; r0 is power[difference]
imul r1 ; r2, r0 is the amplified first coefficient
jo less_overflow_first
cmp r0, r11 ; is first coefficient less than the other?
jl return_true
jmp return_false
pad
less_second:
neg r8
cmp r8, r2 ; compare the exponent difference to 18
cmovge r8, r2 ; r8 is min(exponent difference, 18)
mov r0, [r0+r8*8] ; r0 is 10 ** min(exponent difference, 18)
imul r11 ; r2, r0 is the amplified second coefficient
jo less_overflow_second
cmp r1, r0 ; is first coefficient less than the other?
jl return_true
jmp return_false
pad
less_overflow_first:
test r2, r2
jns return_false
jmp return_true
pad
less_overflow_second:
test r2, r2
jns return_true
jmp return_false
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_is_integer: function_with_one_parameter
;(number: dec64) returns comparison: dec64
; If the number contains a non-zero fractional part or if it is nan, return
; false. Otherwise, return true.
cmp r1_b, 128 ; nan exponent?
je return_false ; nan is not an integer
mov r0, r1 ; r0 is the number
sar r0, 8 ; r0 is the coefficient
jz return_true ; zero is an integer
test r1_b, r1_b ; examine the exponent
jns return_true ; a positive exponent means an integer
cmp r1_b, -17 ; huge negative exponents can never be int
jl return_false
movsx r8, r1_b ; r8 is the first exponent
neg r8 ; negate the exponent
mov r9, power
mov r10, [r9+r8*8] ; r10 is 10^-exponent
cqo ; sign extend r0 into r2
idiv r10 ; divide r2:r0 by the power of ten
test r2, r2 ; examine the remainder
jz return_true ; if the remainder is zero, then return true
jmp return_false
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_is_nan: function_with_one_parameter
;(number: dec64) returns comparison: dec64
cmp r1_b, 128 ; is r1 nan?
je return_true
jmp return_false
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_is_zero: function_with_one_parameter
;(number: dec64) returns comparison: dec64
cmp r1_b, 128 ; is r1 nan?
setne r0_b ; r0_b is 1 if not nan
sar r1, 8 ; r1 is the coefficient
setz r0_h ; r0_h is 1 if zero
test r0_b, r0_h
jnz return_true
jmp return_false
pad; -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
dec64_is_false: function_with_one_parameter
;(boolean: dec64) returns notation: dec64
; If the argument is false, the result is true.
; Otherwise, the result is false.
mov r0, false
cmp r1, r0
je return_true
return_false:
mov r0, false
ret
return_true:
mov r0, true
ret
#!/bin/bash
# This script builds and runs the DEC64 tests for an ARM64 Android device. It
# takes no arguments.
# DEC64's ARM source must be tweaked slightly to make it compatible with
# the Android NDK's clang:
# 0. Comments start with a double slash, not a semicolon.
# 1. The global directive, like all directives, is preceeded by a period.
# 2. The area directive is not supported. We do, however, align.
# 3. Labels must be suffixed with a colon.
# 4. An 8 byte constant is declared with .quad rather than dcq.
# 5. When shifting an address offset, an extra comma is required.
# 6. The end directive is unnecessary.
sed \
-E \
-e "s_;_//_g" \
-e "s/global ([a-z0-9_]+) \\[func\\]/.global \1/g" \
-e "s/area dec64, align=8, code, readonly/.align 8/g" \
-e "s/^([a-z0-9_]+)/\1:/g" \
-e "s/dcq/.quad/g" \
-e "s/x([0-9]) lsl 3/x\1, lsl 3/g" \
-e "/ end/d" \
<dec64.s \
>dec64.android.s \
|| exit
build_and_run() {
# Compile, assemble and link a source file into a binary executable, then
# transfer and run that executable on a connected Android device.
src=$1
bin=$2
# The tests shift negative integers on purpose, so we suppress that warning.
"NDK/toolchains/llvm/prebuilt/darwin-x86_64/bin/aarch64-linux-android32-clang" \
-Wno-shift-negative-value \
-o $bin \
dec64_string.c \
dec64_math.c \
dec64.android.s \
$src \
&& adb push ./$bin /data/local/tmp/$bin \
&& adb shell /data/local/tmp/$bin
}
build_and_run dec64_test.c dec64_test \
&& build_and_run dec64_string_test.c dec64_string_test \
&& build_and_run dec64_math_test.c dec64_math_test \
|| exit
# This script has been tested on a Samsung A52 5G device running Android 12,
# using the Android NDK r25b.
#!/bin/sh
# This script builds and runs the DEC64 tests for an x64 machine running MacOS
# (Darwin). It takes no arguments.
# Assemble DEC64 into dec64.o. We prefix the function names with an underscore,
# because that is what clang expects.
nasm \
-f macho64 \
--prefix _ \
dec64.nasm \
|| exit
build() {
# Compile, assemble and link a source file into a binary executable.
# We opt out of creating a position independent executable (a PIE), because we
# depend on absolute addressing. Without passing the linker the -no_pie flag,
# we get a warning like
# PIE disabled. Absolute addressing (perhaps -mdynamic-no-pic) not allowed in
# code signed PIE, but used in pack_increase from dec64.o.
# because of lines in dec64.nasm like
# mov r10, [r10+r9*8]
# The tests shift negative integers on purpose, so we suppress that warning.
src=$1
bin=$2
clang \
-Wl,-no_pie \
-Wno-shift-negative-value \
-o $bin \
dec64_string.c \
dec64_math.c \
dec64.o \
$src
}
# Build the tests.
build dec64_test.c dec64_test \
&& build dec64_string_test.c dec64_string_test \
&& build dec64_math_test.c dec64_math_test \
|| exit
# Run the tests.
./dec64_test \
&& ./dec64_string_test \
&& ./dec64_math_test \
|| exit
# This script has been tested on MacOS 11.4 (Big Sur), using NASM
# v2.15.05 and clang 13.0.0.
#!/bin/bash
# This script builds and runs the DEC64 tests for a Macintosh with an Apple
# Silicon processor. It takes no arguments.
# DEC64's ARM source must be tweaked slightly to make it compatible with clang:
# 0. Comments start with a double slash, not a semicolon.
# 1. The global directive, like all directives, is preceeded by a period.
# 2. The area directive is not supported. We do, however, align.
# 3. Labels must be suffixed with a colon.
# 4. An 8 byte constant is declared with .quad rather than dcq.
# 5. When shifting an address offset, an extra comma is required.
# 6. The end directive is unnecessary.
# 7. The names of entrypoints require a leading underscore to link correctly.
sed \
-E \
-e "s_;_//_g" \
-e "s/global ([a-z0-9_]+) \\[func\\]/.global \1/g" \
-e "s/area dec64, align=8, code, readonly/.align 8/g" \
-e "s/^([a-z0-9_]+)/\1:/g" \
-e "s/dcq/.quad/g" \
-e "s/x([0-9]) lsl 3/x\1, lsl 3/g" \
-e "/ end/d" \
-e "s/dec64_/_dec64_/g" \
<dec64.s \
>dec64.silicon.s \
|| exit
build_and_run() {
# Compile, assemble and link a source file into a binary executable, then
# run it.
src=$1
bin=$2
# The tests shift negative integers on purpose, so we suppress that warning.
clang \
-Wno-shift-negative-value \
-o $bin \
dec64.silicon.s \
dec64_string.c \
dec64_math.c \
$src \
&& ./$bin
}
build_and_run dec64_test.c dec64_test \
&& build_and_run dec64_string_test.c dec64_string_test \
&& build_and_run dec64_math_test.c dec64_math_test \
|| exit
# This script has been tested on a 2021 MacBook Pro with an M1 Pro processor.
#!/bin/sh
# This script builds and runs the DEC64 tests for an x64 machine running Linux.
# It takes no arguments.
# Assemble DEC64 into dec64.o.
nasm -f elf64 dec64.nasm || exit
build() {
# Compile, assemble and link a source file into a binary executable.
# We opt out of creating a position independent executable (a PIE), because we
# depend on absolute addressing. Without passing the the -no-pie flag, we get a
# warning like
# /usr/bin/ld: dec64.o: warning: relocation in read-only section `.text'
# /usr/bin/ld: warning: creating DT_TEXTREL in a PIE
# because of lines in dec64.nasm like
# mov r10, [r10+r9*8]
src=$1
bin=$2
gcc \
-no-pie \
-o $bin \
dec64_string.c \
dec64_math.c \
dec64.o \
$src
}
# Build the tests.
build dec64_test.c dec64_test \
&& build dec64_string_test.c dec64_string_test \
&& build dec64_math_test.c dec64_math_test \
|| exit
# Run the tests.
./dec64_test \
&& ./dec64_string_test \
&& ./dec64_math_test \
|| exit
# This script has been tested on Debian GNU/Linux 11 (Bullseye), using NASM
# v2.15.05 and gcc v10.2.1.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment