Skip to content

Instantly share code, notes, and snippets.

@markcmarino
Created February 20, 2020 22:52
Show Gist options
  • Save markcmarino/934e24a74fa59b762221d0a7e00f7ec8 to your computer and use it in GitHub Desktop.
Save markcmarino/934e24a74fa59b762221d0a7e00f7ec8 to your computer and use it in GitHub Desktop.
The matrices.s assembly code written by Friedrich Kittler
# povasm.asm-Teil nach LINUX-float portiert
# Compute_Axis_Transform nur formal getestet
# braucht ray.s
FL=4 # 8 for double
.version "1.30"
# 05.02.11
.equ PII,1
.equ ONE,0x3F800000 # change for double
.equ NEGATIVE,0x80
.data
.extern Epsilon
mat00: .fill 4,FL
mat01: .fill 4,FL
mat02: .fill 4,FL
mat03: .fill 4,FL
mat10: .fill 4,FL
mat11: .fill 4,FL
mat12: .fill 4,FL
mat13: .fill 4,FL
degrees:.float 180.0
.text
.extern vcross #void f(VCT3 *d,*s1,*s2)
.globl MTimes #void f(MatrixT *d,*s1,*s2)
.globl InvertMatrix #int f(VCT3 out[3],in[3])
.globl MIdentity #void f(MatrixT *s)
.globl MTranspose #void f(MatrixT *d,MatrixT *s)
.globl MITranspose #void f(MatrixT *s)
.globl MTransPoint #void f(VCT3 *d,*s,TransformT *t)
.globl MInvTransPoint #void f(VCT3 *d,*s,TransformT *t)
.globl MTransDirection #void f(VCT3 *d,*s,TransformT *t)
.globl MInvTransDirection #void f(VCT3 *d,*s,TransformT *t)
.globl MTransNormal #void f(VCT3 *d,*s,TransformT *t)
.globl MTransNormalize #void f(VCT3 *d,*s,TransformT *t)
.globl MInvTransNormal #void f(VCT3 *d,*s,TransformT *t)
.globl Create_Transform #void f(TransformT *t)
.globl Compute_Scaling_Transform #void f(TransformT *d,VCT3 *s)
.globl Compute_Translation_Transform #void f(TransformT *d,VCT3 *s)
.globl Compute_Rotation_Transform #void f(TransformT *d VCT3 *s);
.globl Compute_Axis_Transform #void f(TransformT *d,VCT3 *s,float w)
.globl Compose_Transforms #void f(MatrixT *Original,*New)
.align 8
MTimes: pushl %esi # clobbers ecx; eax+edx free
pushl %edi
pushl %ebx
movl (12+12)(%esp),%esi
xorl %ecx,%ecx
movl (8+12)(%esp),%ebx
movl $mat00,%edi
movb $4,%cl
mat2: flds (%esi)
flds 1*FL(%esi)
flds 2*FL(%esi)
flds 3*FL(%esi)
flds (%ebx)
fmul %st,%st(4)
movb $3,%ch
fmul %st,%st(3)
fmul %st,%st(2)
fmulp %st,%st(1)
.align 4
mat1: addl $FL,%ebx
addl $(4*FL),%esi
flds (%ebx)
fld %st
fmuls (%esi)
faddp %st,%st(5)
fld %st
fmuls 1*FL(%esi)
faddp %st,%st(4)
fld %st
fmuls 2*FL(%esi)
faddp %st,%st(3)
decb %ch
fmuls 3*FL(%esi)
faddp %st,%st(1)
jnz mat1
fstps 3*FL(%edi)
fstps 2*FL(%edi)
subl $(12*FL),%esi
fstps 1*FL(%edi)
addl $FL,%ebx
fstps (%edi)
addl $(4*FL),%edi
loop mat2
movl $mat00,%esi
movb $(4*FL),%cl
popl %ebx
movl (4+8)(%esp),%edi# result
rep
movsl
popl %edi
popl %esi
ret
.align 8
InvertMatrix:
movl 8(%esp),%ecx # in
movl 4(%esp),%eax # out
addl $(6*FL),%ecx # in[2]
movl %ecx,%edx
subl $(3*FL),%edx # in[1]
call vcross
addl $(3*FL),%edx # in[2]
addl $(3*FL),%eax # out[1]
subl $(6*FL),%ecx # in[0]
call vcross
subl $(6*FL),%edx # in[0]
addl $(3*FL),%ecx # in[1]
addl $(3*FL),%eax # out[2]
call vcross
flds (%edx) # in[0]x
fld %st
flds 4*FL(%edx) # in[1]y
fmul %st,%st(2)
flds 8*FL(%edx) # in[2]z
fmul %st,%st(3)
flds 1*FL(%edx) # in[0]y
fmul %st,%st(1)
flds 3*FL(%edx) # in[1]x
fmul %st,%st(2)
flds 2*FL(%edx) # in[0]z
fmul %st,%st(1)
flds 7*FL(%edx) # in[2]y
fmul %st,%st(2)
fxch %st(2)
faddp %st,%st(7)
fmulp %st,%st(4)
fmulp %st,%st(4)
flds 5*FL(%edx) # in[1]z
fmul %st,%st(1)
flds 6*FL(%edx) # in[2]x
fmul %st,%st(2)
fmulp %st,%st(4)
fmulp %st,%st(4)
faddp %st,%st(4)
fsubrp %st,%st(3)
fsubrp %st,%st(2)
fsubrp %st,%st(1)
fst %st(1)
fabs
fcomps Epsilon
fnstsw
sahf
jae bgeps
fstp %st
xorl %eax,%eax
ret
nop
bgeps: fld1
fdivp %st,%st(1)
movl 4(%esp),%ecx
movl $8,%edx
movl $1,%eax
dschl: flds (%ecx,%edx,4) # fuer double: 8 statt 4
fmul %st(1),%st
fstps (%ecx,%edx,4) # dito
decl %edx
jns dschl
fstp %st
ret
.align 8 # change for double
MIdentity: # ebx taboo, ecx+eax clobbered
pushl %edi
movl $(4*FL-1),%ecx
movl (4+4)(%esp),%edi
xorl %eax,%eax
rep
stosl
movl $ONE,%eax
subl $(15*FL),%edi
movl %eax,(%edi)
movl %eax,5*FL(%edi)
movl %eax,10*FL(%edi)
movl %eax,15*FL(%edi)
popl %edi
ret $4
.align 8
Create_Transform:
movl 4(%esp),%edx
pushl %edx
call MIdentity # matrix
addl $(16*FL),%edx
pushl %edx
call MIdentity # inverse
ret
.align 8
MTranspose:
pushl %edi
pushl %esi
movl (4+8)(%esp),%edi
movl (8+8)(%esp),%esi
movl $4,%eax
column: .align 2
movl $4,%ecx
line: .align 2
movsl
#movsl - bei double
addl $(3*FL),%edi
loop line
subl $(15*FL),%edi
decl %eax
jnz column
popl %esi
popl %edi
ret
.align 8 # change for double
MITranspose:
movl 4(%esp),%edx
movl 4*FL(%edx),%eax
xchgl %eax,1*FL(%edx)
movl %eax,4*FL(%edx)
movl 8*FL(%edx),%eax
xchgl %eax,2*FL(%edx)
movl %eax,8*FL(%edx)
movl 12*FL(%edx),%eax
xchgl %eax,3*FL(%edx)
movl %eax,12*FL(%edx)
movl 9*FL(%edx),%eax
xchgl %eax,6*FL(%edx)
movl %eax,9*FL(%edx)
movl 13*FL(%edx),%eax
xchgl %eax,7*FL(%edx)
movl %eax,13*FL(%edx)
movl 14*FL(%edx),%eax
xchgl %eax,11*FL(%edx)
movl %eax,14*FL(%edx)
ret
.align 8
MTransPoint:
# movl 8(%esp),%edx # s
# movl 12(%esp),%ecx # t
flds (%edx)
flds 1*FL(%edx)
flds 2*FL(%edx)
flds (%ecx)
fmul %st(3),%st
flds 1*FL(%ecx)
fmul %st(4),%st
flds 2*FL(%ecx)
fmulp %st,%st(5)
flds 4*FL(%ecx)
fmul %st(4),%st
faddp %st,%st(2)
flds 5*FL(%ecx)
fmul %st(4),%st
faddp %st,%st(1)
flds 6*FL(%ecx)
fmulp %st,%st(4)
flds 8*FL(%ecx)
fmul %st(3),%st
faddp %st,%st(2)
flds 9*FL(%ecx)
fmul %st(3),%st
# movl 4(%esp),%eax # d
faddp %st,%st(1)
flds 10*FL(%ecx)
fmulp %st,%st(3)
fadds 13*FL(%ecx)
fstps 1*FL(%eax)
fadds 12*FL(%ecx)
fstps (%eax)
faddp %st,%st(1)
fadds 14*FL(%ecx)
faddp %st,%st(1)
fstps 2*FL(%eax)
ret
.align 8
MInvTransPoint:
# movl 8(%esp),%edx # s
# movl 12(%esp),%ecx # t
flds (%edx)
addl $(16*FL),%ecx # t->inverse
flds 1*FL(%edx)
flds 2*FL(%edx)
flds (%ecx)
fmul %st(3),%st
flds 1*FL(%ecx)
fmul %st(4),%st
flds 2*FL(%ecx)
fmulp %st,%st(5)
flds 4*FL(%ecx)
fmul %st(4),%st
faddp %st,%st(2)
flds 5*FL(%ecx)
fmul %st(4),%st
faddp %st,%st(1)
flds 6*FL(%ecx)
fmulp %st,%st(4)
flds 8*FL(%ecx)
fmul %st(3),%st
faddp %st,%st(2)
flds 9*FL(%ecx)
fmul %st(3),%st
# movl 4(%esp),%eax # d
faddp %st,%st(1)
fadds 13*FL(%ecx)
fstps 1*FL(%eax)
fadds 12*FL(%ecx)
fstps (%eax)
fmuls 10*FL(%ecx)
faddp %st,%st(1)
fadds 14*FL(%ecx)
faddp %st,%st(1)
fstps 2*FL(%eax)
ret
.align 8
MTransDirection:
# movl 8(%esp),%edx # s
# movl 12(%esp),%ecx # t
flds (%edx)
flds 1*FL(%edx)
flds 2*FL(%edx)
flds (%ecx)
fmul %st(3),%st
flds 1*FL(%ecx)
fmul %st(4),%st
flds 2*FL(%ecx)
fmulp %st,%st(5)
flds 4*FL(%ecx)
fmul %st(4),%st
faddp %st,%st(2)
flds 5*FL(%ecx)
fmul %st(4),%st
faddp %st,%st(1)
flds 6*FL(%ecx)
fmulp %st,%st(4)
flds 8*FL(%ecx)
fmul %st(3),%st
faddp %st,%st(2)
flds 9*FL(%ecx)
fmul %st(3),%st
# movl 4(%esp),%eax # d
faddp %st,%st(1)
flds 10*FL(%ecx)
fmulp %st,%st(3)
fstps 1*FL(%eax)
fstps (%eax)
faddp %st,%st(1)
faddp %st,%st(1)
fstps 2*FL(%eax)
ret
.align 8
MInvTransDirection:
# movl 8(%esp),%edx # s
# movl 12(%esp),%ecx # t
flds (%edx)
leal 16*FL(%ecx),%ecx# t->inverse
flds 1*FL(%edx)
flds 2*FL(%edx)
flds (%ecx)
fmul %st(3),%st
flds 1*FL(%ecx)
fmul %st(4),%st
flds 2*FL(%ecx)
fmulp %st,%st(5)
flds 4*FL(%ecx)
fmul %st(4),%st
faddp %st,%st(2)
flds 5*FL(%ecx)
fmul %st(4),%st
faddp %st,%st(1)
flds 6*FL(%ecx)
fmulp %st,%st(4)
flds 8*FL(%ecx)
fmul %st(3),%st
faddp %st,%st(2)
flds 9*FL(%ecx)
fmul %st(3),%st
faddp %st,%st(1)
flds 10*FL(%ecx)
fmulp %st,%st(3)
# movl 4(%esp),%eax # d
fstps 1*FL(%eax)
fstps (%eax)
faddp %st,%st(1)
faddp %st,%st(1)
fstps 2*FL(%eax)
ret
.align 8
MTransNormal:
# movl 8(%esp),%edx # s
# movl 12(%esp),%ecx # t
flds (%edx)
addl $(16*FL),%ecx # t->inverse
flds 1*FL(%edx)
flds 2*FL(%edx)
flds (%ecx)
fmul %st(3),%st
flds 4*FL(%ecx)
fmul %st(4),%st
flds 8*FL(%ecx)
fmulp %st,%st(5)
flds 1*FL(%ecx)
fmul %st(4),%st
faddp %st,%st(2)
flds 5*FL(%ecx)
fmul %st(4),%st
faddp %st,%st(1)
flds 9*FL(%ecx)
fmulp %st,%st(4)
flds 2*FL(%ecx)
fmul %st(3),%st
faddp %st,%st(2)
flds 6*FL(%ecx)
fmul %st(3),%st
# movl 4(%esp),%eax # d
faddp %st,%st(1)
flds 10*FL(%ecx)
fmulp %st,%st(3)
fstps 1*FL(%eax)
fstps (%eax)
faddp %st,%st(1)
faddp %st,%st(1)
fstps 2*FL(%eax)
ret
.align 8
MTransNormalize:
# movl 8(%esp),%edx # s
# movl 12(%esp),%ecx # t
flds (%edx)
addl $(16*FL),%ecx # t->inverse
flds 1*FL(%edx)
flds 2*FL(%edx)
flds (%ecx)
fmul %st(3),%st
flds 4*FL(%ecx)
fmul %st(4),%st
flds 8*FL(%ecx)
fmulp %st,%st(5)
flds 1*FL(%ecx)
fmul %st(4),%st
faddp %st,%st(2)
flds 5*FL(%ecx)
fmul %st(4),%st
faddp %st,%st(1)
flds 9*FL(%ecx)
fmulp %st,%st(4)
flds 2*FL(%ecx)
fmul %st(3),%st
faddp %st,%st(2)
flds 6*FL(%ecx)
fmul %st(3),%st
faddp %st,%st(1)
flds 10*FL(%ecx)
fmulp %st,%st(3)
fld %st
fmul %st,%st
fxch %st(5)
faddp %st,%st(3)
fxch %st(3)
faddp %st,%st(2)
fld %st
fmul %st,%st
faddp %st,%st(4)
fld %st(1)
fmul %st,%st
fadd %st(4),%st
fsqrt
# movl 4(%esp),%ecx # d
movl %eax,%ecx # for regparm only
.ifndef PII
fcoms Epsilon
fnstsw
sahf
fld1
jbe tfehl
nop
.else
flds Epsilon
fucomip %st(1),%st
fld1
ja tfehl
.endif
fdivp %st,%st(1)
fmul %st,%st(3)
fmul %st,%st(2)
fmulp %st,%st(1)
fstps (%ecx)
fstps 2*FL(%ecx)
fstps 1*FL(%ecx)
fstp %st
ret
tfehl: fstps (%ecx)
fucompp
fucompp
fstp %st
fldz
fsts 1*FL(%ecx)
fstps 2*FL(%ecx)
ret
.align 8
Compute_Scaling_Transform:
pushl %edi
xorl %eax,%eax
movl (4+4)(%esp),%edi# d
movl $(8*FL),%ecx
rep
stosl
subl $(32*FL),%edi
movl $ONE,%eax
movl 12(%esp),%edx # s
movl %eax,15*FL(%edi)
fld1
movl %eax,31*FL(%edi)
flds (%edx)
fsts (%edi)
fdivr %st(1),%st
fstps 16*FL(%edi)
flds 1*FL(%edx)
fsts 5*FL(%edi)
fdivr %st(1),%st
fstps 21*FL(%edi)
flds 2*FL(%edx)
fsts 10*FL(%edi)
fdivrp %st,%st(1)
fstps 26*FL(%edi)
popl %edi
ret
.align 8
Compute_Translation_Transform:
pushl %esi
pushl %edi
movl $(8*FL),%ecx
movl (4+8)(%esp),%edi
xorl %eax,%eax
rep
stosl
subl $(32*FL),%edi
movl $ONE,%eax
movl %eax,(%edi)
movl %eax,5*FL(%edi)
movl %eax,10*FL(%edi)
movl %eax,15*FL(%edi)
addl $(16*FL),%edi # t->inverse
movl %eax,(%edi)
movl %eax,5*FL(%edi)
movl %eax,10*FL(%edi)
movl %eax,15*FL(%edi)
subl $(4*FL),%edi
movl 16(%esp),%esi # VCT3
movb $3,%cl
rep
movsl # matrix[3][0..2]
subl $(3*FL),%esi
addl $(13*FL),%edi
movb $3,%cl
rep
movsl # inverse[3][0..2]
subl $(3*FL),%edi
movb $NEGATIVE,%al
addb %al,(FL-1)(%edi)
addb %al,(2*FL-1)(%edi)
addb %al,(3*FL-1)(%edi)
popl %edi
popl %esi
ret
.align 8
Compute_Rotation_Transform:
pushl %ebx
fldpi
fdivs degrees
movl 12(%esp),%ebx # v
flds 1*FL(%ebx)
fmul %st(1),%st
flds (%ebx)
fmul %st(2),%st
flds 2*FL(%ebx)
fmulp %st,%st(3)
movl 8(%esp),%ebx # transform
pushl %ebx
call MIdentity
fsincos
fsts 5*FL(%ebx) # m11
fstps 10*FL(%ebx) # m22
fsts 6*FL(%ebx) # m12
fchs
fstps 9*FL(%ebx) # m21
push %ebx
addl $(16*FL),%ebx # t->inverse
pushl %ebx
call MTranspose # zeroes eax+ecx
addl $8,%esp
movl $mat10,%eax
pushl %eax
call MIdentity
movl $mat10,%ebx
fsincos
fsts (%ebx) # m00
fstps 10*FL(%ebx) # m22
fsts 8*FL(%ebx) # m20
fchs
fstps 2*FL(%ebx) # m02
pushl %ebx
movl (8+4)(%esp),%ebx
push %ebx
push %ebx
call MTimes
addl $12,%esp
movl $mat10,%eax
pushl %eax
call MITranspose
addl $4,%esp
movl (4+4)(%esp),%ebx
addl $(16*FL),%ebx # t->inverse
pushl %ebx
movl $mat10,%eax
pushl %eax
pushl %ebx
call MTimes
addl $12,%esp
pushl %eax
call MIdentity
fsincos
movl $mat10,%ebx
fsts (%ebx) # m00
fstps 5*FL(%ebx) # m11
fsts 1*FL(%ebx) # m01
fchs
fstps 4*FL(%ebx) # m10
pushl %ebx
movl (8+4)(%esp),%ebx
pushl %ebx
pushl %ebx
call MTimes
addl $12,%esp
movl $mat10,%eax
pushl %eax
call MITranspose
addl $4,%esp
movl (4+4)(%esp),%ebx
addl $(16*FL),%ebx # t->inverse
pushl %ebx
movl $mat10,%eax
pushl %eax
pushl %ebx
call MTimes
addl $12,%esp
popl %ebx
ret
.align 8
Compute_Axis_Transform:
flds (12)(%esp) # angle
fsincos
movl (8)(%esp),%edx # VCT3
flds (%edx)
flds 1*FL(%edx)
flds 2*FL(%edx)
fld %st
fmul %st,%st
fld %st(2)
fmul %st,%st
faddp %st,%st(1)
fld %st(3)
fmul %st,%st
faddp %st,%st(1)
fsqrt
fld1
fdivp %st,%st(1)
movl (4)(%esp),%edx # t->matrix
xorl %eax,%eax
fmul %st,%st(3)
fmul %st,%st(2)
fmulp %st,%st(1) # normalize
fld %st(2)
fmul %st,%st
fld1
fsub %st(1),%st
fmul %st(5),%st
faddp %st,%st(1)
fstps (%edx)
fld1
fsub %st(4),%st
fmul %st(2),%st
fmul %st(3),%st
fld %st(1)
fmul %st(6),%st
faddp %st,%st(1)
fstps 1*FL(%edx)
fld1
fsub %st(4),%st
fmul %st(3),%st
fmul %st(1),%st
movl %eax,3*FL(%edx)
fld %st(2)
fmul %st(6),%st
fsubrp %st,%st(1)
fstps 2*FL(%edx)
fld1
fsub %st(4),%st
fmul %st(3),%st
fmul %st(2),%st
fld %st(1)
fmul %st(6),%st
fsubrp %st,%st(1)
fstps 4*FL(%edx)
fld %st(1)
fmul %st,%st
fld1
fsub %st(1),%st
fmul %st(5),%st
faddp %st,%st(1)
fstps 5*FL(%edx)
fld1
fsub %st(4),%st
fmul %st(2),%st
movl %eax,7*FL(%edx)
fmul %st(1),%st
fld %st(3)
fmul %st(6),%st
faddp %st,%st(1)
fstps 6*FL(%edx)
fld1
fsub %st(4),%st
fld %st
fmul %st(4),%st
fmul %st(2),%st
fld %st(3)
fmul %st(7),%st
faddp %st,%st(1)
fstps 8*FL(%edx)
fmulp %st,%st(2)
fmul %st,%st(1)
fxch %st(2)
fmulp %st,%st(4)
fsubp %st,%st(3)
fmul %st,%st
fld1
fsub %st(1),%st
movl $FL,%ecx
fmulp %st,%st(2)
faddp %st,%st(1)
fstps 10*FL(%edx)
fstps 9*FL(%edx)
pushl %edi
leal 11*FL(%edx),%edi
fld1
rep
stosl
fstps (%edi)
popl %edi
pushl %edx # t->matrix
leal 16*FL(%edx),%edx
pushl %edx # t->inverse
call MTranspose # zeros eax+ecx
addl $8,%esp
ret
.align 8
Compose_Transforms:
movl 4(%esp),%edx # Original_Transform
movl 8(%esp),%eax # New_Transform
pushl %eax
pushl %edx
pushl %edx
call MTimes # edx saved
addl $12,%esp
addl $(16*FL),%edx # t->inverse
pushl %edx
addl $(16*FL),%eax # t->inverse
pushl %eax
pushl %edx
call MTimes
addl $12,%esp
ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment