markcmarino/matrices.s

## matrices.s
# povasm.asm-Teil nach LINUX-float portiert
# Compute_Axis_Transform nur formal getestet
# braucht ray.s

FL=4     # 8 for double
.version "1.30"
# 05.02.11
.equ     PII,1
.equ     ONE,0x3F800000		# change for double
.equ     NEGATIVE,0x80

.data
.extern Epsilon
mat00:	.fill     4,FL
mat01:	.fill     4,FL
mat02:	.fill     4,FL
mat03:	.fill     4,FL
mat10:	.fill     4,FL
mat11:	.fill     4,FL
mat12:	.fill     4,FL
mat13:	.fill     4,FL
degrees:.float    180.0

.text
.extern vcross			#void f(VCT3 *d,*s1,*s2)
.globl	MTimes			#void f(MatrixT *d,*s1,*s2)
.globl	InvertMatrix		#int  f(VCT3 out[3],in[3])
.globl	MIdentity		#void f(MatrixT *s)
.globl	MTranspose		#void f(MatrixT *d,MatrixT *s)
.globl	MITranspose		#void f(MatrixT *s)
.globl	MTransPoint		#void f(VCT3 *d,*s,TransformT *t)
.globl	MInvTransPoint		#void f(VCT3 *d,*s,TransformT *t)
.globl	MTransDirection		#void f(VCT3 *d,*s,TransformT *t)
.globl	MInvTransDirection	#void f(VCT3 *d,*s,TransformT *t)
.globl	MTransNormal		#void f(VCT3 *d,*s,TransformT *t)
.globl	MTransNormalize		#void f(VCT3 *d,*s,TransformT *t)
.globl  MInvTransNormal		#void f(VCT3 *d,*s,TransformT *t)
.globl  Create_Transform	#void f(TransformT *t)
.globl	Compute_Scaling_Transform   #void f(TransformT *d,VCT3 *s)
.globl	Compute_Translation_Transform #void f(TransformT *d,VCT3 *s)
.globl  Compute_Rotation_Transform  #void f(TransformT *d VCT3 *s);
.globl	Compute_Axis_Transform	#void f(TransformT *d,VCT3 *s,float w)
.globl	Compose_Transforms	#void f(MatrixT *Original,*New)

	.align 8
MTimes:	pushl	%esi            # clobbers ecx; eax+edx free
	pushl	%edi
	pushl	%ebx
	movl	(12+12)(%esp),%esi
	xorl	%ecx,%ecx
	movl	(8+12)(%esp),%ebx
	movl	$mat00,%edi
	movb	$4,%cl
mat2:	flds	(%esi)
	flds	1*FL(%esi)
	flds	2*FL(%esi)
	flds	3*FL(%esi)
	flds	(%ebx)
	fmul	%st,%st(4)
	movb	$3,%ch
	fmul	%st,%st(3)
	fmul	%st,%st(2)
	fmulp	%st,%st(1)
	.align  4
mat1:	addl	$FL,%ebx
	addl	$(4*FL),%esi
	flds	(%ebx)
	fld	%st
	fmuls	(%esi)
	faddp	%st,%st(5)
	fld	%st
	fmuls	1*FL(%esi)
	faddp	%st,%st(4)
	fld	%st
	fmuls	2*FL(%esi)
	faddp	%st,%st(3)
	decb	%ch
	fmuls	3*FL(%esi)
	faddp	%st,%st(1)
	jnz	mat1
	fstps	3*FL(%edi)
	fstps	2*FL(%edi)
	subl	$(12*FL),%esi
	fstps	1*FL(%edi)
	addl	$FL,%ebx
	fstps	(%edi)
	addl	$(4*FL),%edi
	loop	mat2
	movl	$mat00,%esi
	movb	$(4*FL),%cl
	popl	%ebx
	movl	(4+8)(%esp),%edi# result
	rep
	movsl
	popl	%edi
	popl	%esi
	ret

	.align  8
InvertMatrix:
	movl	8(%esp),%ecx	# in
	movl	4(%esp),%eax    # out
	addl	$(6*FL),%ecx    # in[2]
	movl	%ecx,%edx
	subl	$(3*FL),%edx    # in[1]
	call	vcross
	addl	$(3*FL),%edx	# in[2]
	addl	$(3*FL),%eax    # out[1]
	subl	$(6*FL),%ecx    # in[0]
	call	vcross
	subl	$(6*FL),%edx    # in[0]
	addl	$(3*FL),%ecx    # in[1]
	addl	$(3*FL),%eax    # out[2]
	call	vcross
 	flds	(%edx)		# in[0]x
	fld	%st
	flds	4*FL(%edx)	# in[1]y
	fmul	%st,%st(2)
	flds	8*FL(%edx)	# in[2]z
	fmul	%st,%st(3)
	flds	1*FL(%edx)	# in[0]y
	fmul	%st,%st(1)
	flds	3*FL(%edx)	# in[1]x
	fmul	%st,%st(2)
	flds	2*FL(%edx)	# in[0]z
	fmul	%st,%st(1)
	flds	7*FL(%edx)	# in[2]y
	fmul	%st,%st(2)
	fxch	%st(2)
	faddp	%st,%st(7)
	fmulp	%st,%st(4)
	fmulp	%st,%st(4)
	flds	5*FL(%edx)	# in[1]z
	fmul	%st,%st(1)
	flds	6*FL(%edx)	# in[2]x
	fmul	%st,%st(2)
	fmulp	%st,%st(4)
	fmulp	%st,%st(4)
	faddp	%st,%st(4)
	fsubrp	%st,%st(3)
	fsubrp	%st,%st(2)
	fsubrp	%st,%st(1)
	fst	%st(1)
	fabs
	fcomps	Epsilon
	fnstsw
	sahf
	jae	bgeps
	fstp	%st
	xorl	%eax,%eax
	ret
	nop
bgeps:	fld1
	fdivp	%st,%st(1)
	movl	4(%esp),%ecx
	movl	$8,%edx
	movl	$1,%eax
dschl:	flds	(%ecx,%edx,4)	# fuer double:	8 statt 4
	fmul	%st(1),%st
	fstps	(%ecx,%edx,4)   # dito
	decl	%edx
	jns	dschl
	fstp	%st
	ret

	.align	8		# change for double
MIdentity:                      # ebx taboo, ecx+eax clobbered
	pushl	%edi
	movl	$(4*FL-1),%ecx
	movl	(4+4)(%esp),%edi
	xorl	%eax,%eax
	rep
	stosl
	movl	$ONE,%eax
	subl	$(15*FL),%edi
	movl	%eax,(%edi)
	movl	%eax,5*FL(%edi)
	movl	%eax,10*FL(%edi)
	movl	%eax,15*FL(%edi)
	popl	%edi
	ret	$4

	.align  8
Create_Transform:
	movl	4(%esp),%edx
	pushl	%edx
	call	MIdentity        # matrix
	addl	$(16*FL),%edx
	pushl	%edx
	call	MIdentity        # inverse
	ret

	.align	8
MTranspose:
	pushl	%edi
	pushl	%esi
	movl	(4+8)(%esp),%edi
	movl	(8+8)(%esp),%esi
	movl	$4,%eax
column: .align  2
	movl	$4,%ecx
line:   .align  2
	movsl
	#movsl - bei double
	addl	$(3*FL),%edi
	loop	line
	subl	$(15*FL),%edi
	decl	%eax
	jnz	column
	popl	%esi
	popl	%edi
	ret

	.align	8		# change for double
MITranspose:
	movl	4(%esp),%edx
	movl	4*FL(%edx),%eax
	xchgl	%eax,1*FL(%edx)
	movl	%eax,4*FL(%edx)
	movl	8*FL(%edx),%eax
	xchgl	%eax,2*FL(%edx)
	movl	%eax,8*FL(%edx)
	movl	12*FL(%edx),%eax
	xchgl	%eax,3*FL(%edx)
	movl	%eax,12*FL(%edx)
	movl	9*FL(%edx),%eax
	xchgl	%eax,6*FL(%edx)
	movl	%eax,9*FL(%edx)
	movl	13*FL(%edx),%eax
	xchgl	%eax,7*FL(%edx)
	movl	%eax,13*FL(%edx)
	movl	14*FL(%edx),%eax
	xchgl	%eax,11*FL(%edx)
	movl	%eax,14*FL(%edx)
	ret

	.align	8
MTransPoint:
#	movl	8(%esp),%edx	# s
#	movl	12(%esp),%ecx	# t
	flds	(%edx)
	flds	1*FL(%edx)
	flds	2*FL(%edx)
	flds	(%ecx)
	fmul	%st(3),%st
	flds	1*FL(%ecx)
	fmul	%st(4),%st
	flds	2*FL(%ecx)
	fmulp	%st,%st(5)
	flds	4*FL(%ecx)
	fmul	%st(4),%st
	faddp	%st,%st(2)
	flds	5*FL(%ecx)
	fmul	%st(4),%st
	faddp	%st,%st(1)
	flds	6*FL(%ecx)
	fmulp	%st,%st(4)
	flds	8*FL(%ecx)
	fmul	%st(3),%st
	faddp	%st,%st(2)
	flds	9*FL(%ecx)
	fmul	%st(3),%st
#	movl	4(%esp),%eax	# d
	faddp	%st,%st(1)
	flds	10*FL(%ecx)
	fmulp	%st,%st(3)
	fadds	13*FL(%ecx)
	fstps	1*FL(%eax)
	fadds	12*FL(%ecx)
	fstps	(%eax)
	faddp	%st,%st(1)
	fadds	14*FL(%ecx)
	faddp	%st,%st(1)
	fstps	2*FL(%eax)
	ret

	.align	8
MInvTransPoint:
#	movl	8(%esp),%edx	# s
#	movl	12(%esp),%ecx	# t
	flds	(%edx)
	addl	$(16*FL),%ecx	# t->inverse
	flds	1*FL(%edx)
	flds	2*FL(%edx)
	flds	(%ecx)
	fmul	%st(3),%st
	flds	1*FL(%ecx)
	fmul	%st(4),%st
	flds	2*FL(%ecx)
	fmulp	%st,%st(5)
	flds	4*FL(%ecx)
	fmul	%st(4),%st
	faddp	%st,%st(2)
	flds	5*FL(%ecx)
	fmul	%st(4),%st
	faddp	%st,%st(1)
	flds	6*FL(%ecx)
	fmulp	%st,%st(4)
	flds	8*FL(%ecx)
	fmul	%st(3),%st
	faddp	%st,%st(2)
	flds	9*FL(%ecx)
	fmul	%st(3),%st
#	movl	4(%esp),%eax	# d
	faddp	%st,%st(1)
	fadds	13*FL(%ecx)
	fstps	1*FL(%eax)
	fadds	12*FL(%ecx)
	fstps	(%eax)
	fmuls	10*FL(%ecx)
	faddp	%st,%st(1)
	fadds	14*FL(%ecx)
	faddp	%st,%st(1)
	fstps	2*FL(%eax)
	ret

	.align	8
MTransDirection:
#	movl	8(%esp),%edx    # s
#	movl	12(%esp),%ecx   # t
	flds	(%edx)
	flds	1*FL(%edx)
	flds	2*FL(%edx)
	flds	(%ecx)
	fmul	%st(3),%st
	flds	1*FL(%ecx)
	fmul	%st(4),%st
	flds	2*FL(%ecx)
	fmulp	%st,%st(5)
	flds	4*FL(%ecx)
	fmul	%st(4),%st
	faddp	%st,%st(2)
	flds	5*FL(%ecx)
	fmul	%st(4),%st
	faddp	%st,%st(1)
	flds	6*FL(%ecx)
	fmulp	%st,%st(4)
	flds	8*FL(%ecx)
	fmul	%st(3),%st
	faddp	%st,%st(2)
	flds	9*FL(%ecx)
	fmul	%st(3),%st
#	movl	4(%esp),%eax    # d
	faddp	%st,%st(1)
	flds	10*FL(%ecx)
	fmulp	%st,%st(3)
	fstps	1*FL(%eax)
	fstps	(%eax)
	faddp	%st,%st(1)
	faddp	%st,%st(1)
	fstps	2*FL(%eax)
	ret

	.align	8
MInvTransDirection:
#	movl	8(%esp),%edx	# s
#	movl	12(%esp),%ecx	# t
	flds	(%edx)
	leal	16*FL(%ecx),%ecx# t->inverse
	flds	1*FL(%edx)
	flds	2*FL(%edx)
	flds	(%ecx)
	fmul	%st(3),%st
	flds	1*FL(%ecx)
	fmul	%st(4),%st
	flds	2*FL(%ecx)
	fmulp	%st,%st(5)
	flds	4*FL(%ecx)
	fmul	%st(4),%st
	faddp	%st,%st(2)
	flds	5*FL(%ecx)
	fmul	%st(4),%st
	faddp	%st,%st(1)
	flds	6*FL(%ecx)
	fmulp	%st,%st(4)
	flds	8*FL(%ecx)
	fmul	%st(3),%st
	faddp	%st,%st(2)
	flds	9*FL(%ecx)
	fmul	%st(3),%st
	faddp	%st,%st(1)
	flds	10*FL(%ecx)
	fmulp	%st,%st(3)
#	movl	4(%esp),%eax	# d
	fstps	1*FL(%eax)
	fstps	(%eax)
	faddp	%st,%st(1)
	faddp	%st,%st(1)
	fstps	2*FL(%eax)
	ret

	.align	8
MTransNormal:
#	movl	8(%esp),%edx	# s
#	movl	12(%esp),%ecx	# t
	flds	(%edx)
	addl	$(16*FL),%ecx   # t->inverse
	flds	1*FL(%edx)
	flds	2*FL(%edx)
	flds	(%ecx)
	fmul	%st(3),%st
	flds	4*FL(%ecx)
	fmul	%st(4),%st
	flds	8*FL(%ecx)
	fmulp	%st,%st(5)
	flds	1*FL(%ecx)
	fmul	%st(4),%st
	faddp	%st,%st(2)
	flds	5*FL(%ecx)
	fmul	%st(4),%st
	faddp	%st,%st(1)
	flds	9*FL(%ecx)
	fmulp	%st,%st(4)
	flds	2*FL(%ecx)
	fmul	%st(3),%st
	faddp	%st,%st(2)
	flds	6*FL(%ecx)
	fmul	%st(3),%st
#	movl	4(%esp),%eax	# d
	faddp	%st,%st(1)
	flds	10*FL(%ecx)
	fmulp	%st,%st(3)
	fstps	1*FL(%eax)
	fstps	(%eax)
	faddp	%st,%st(1)
	faddp	%st,%st(1)
	fstps	2*FL(%eax)
	ret

	.align	8
MTransNormalize:
#	movl	8(%esp),%edx	# s
#	movl	12(%esp),%ecx	# t
	flds	(%edx)
	addl	$(16*FL),%ecx   # t->inverse
	flds	1*FL(%edx)
	flds	2*FL(%edx)
	flds	(%ecx)
	fmul	%st(3),%st
	flds	4*FL(%ecx)
	fmul	%st(4),%st
	flds	8*FL(%ecx)
	fmulp	%st,%st(5)
	flds	1*FL(%ecx)
	fmul	%st(4),%st
	faddp	%st,%st(2)
	flds	5*FL(%ecx)
	fmul	%st(4),%st
	faddp	%st,%st(1)
	flds	9*FL(%ecx)
	fmulp	%st,%st(4)
	flds	2*FL(%ecx)
	fmul	%st(3),%st
	faddp	%st,%st(2)
	flds	6*FL(%ecx)
	fmul	%st(3),%st
	faddp	%st,%st(1)
	flds	10*FL(%ecx)
	fmulp	%st,%st(3)
	fld	%st
	fmul	%st,%st
	fxch	%st(5)
	faddp	%st,%st(3)
	fxch	%st(3)
	faddp	%st,%st(2)
	fld	%st
	fmul	%st,%st
	faddp	%st,%st(4)
	fld	%st(1)
	fmul	%st,%st
	fadd	%st(4),%st
	fsqrt
#	movl	4(%esp),%ecx	 # d
	movl	%eax,%ecx        # for regparm only
	.ifndef PII
	fcoms	Epsilon
	fnstsw
	sahf
	fld1
	jbe	tfehl
	nop
	.else
	flds	Epsilon
	fucomip	%st(1),%st
	fld1
	ja	tfehl
	.endif
	fdivp	%st,%st(1)
	fmul	%st,%st(3)
	fmul	%st,%st(2)
	fmulp	%st,%st(1)
	fstps	(%ecx)
	fstps	2*FL(%ecx)
	fstps	1*FL(%ecx)
	fstp	%st
	ret
tfehl:	fstps	(%ecx)
	fucompp
	fucompp
	fstp	%st
	fldz
	fsts	1*FL(%ecx)
	fstps	2*FL(%ecx)
	ret

	.align	8
Compute_Scaling_Transform:
	pushl	%edi
	xorl	%eax,%eax
	movl	(4+4)(%esp),%edi# d
	movl	$(8*FL),%ecx
	rep
	stosl
	subl	$(32*FL),%edi
	movl	$ONE,%eax
	movl	12(%esp),%edx   # s
	movl	%eax,15*FL(%edi)
	fld1
	movl	%eax,31*FL(%edi)
	flds	(%edx)
	fsts	(%edi)
	fdivr	%st(1),%st
	fstps	16*FL(%edi)
	flds	1*FL(%edx)
	fsts	5*FL(%edi)
	fdivr	%st(1),%st
	fstps	21*FL(%edi)
	flds	2*FL(%edx)
	fsts	10*FL(%edi)
	fdivrp	%st,%st(1)
	fstps	26*FL(%edi)
	popl	%edi
	ret

	.align  8
Compute_Translation_Transform:
	pushl	%esi
	pushl	%edi
	movl	$(8*FL),%ecx
	movl	(4+8)(%esp),%edi
	xorl	%eax,%eax
	rep
	stosl
	subl	$(32*FL),%edi
	movl	$ONE,%eax
	movl	%eax,(%edi)
	movl	%eax,5*FL(%edi)
	movl	%eax,10*FL(%edi)
	movl	%eax,15*FL(%edi)
	addl	$(16*FL),%edi   # t->inverse
	movl	%eax,(%edi)
	movl	%eax,5*FL(%edi)
	movl	%eax,10*FL(%edi)
	movl	%eax,15*FL(%edi)
	subl	$(4*FL),%edi
	movl	16(%esp),%esi	# VCT3
	movb	$3,%cl
	rep
	movsl                   # matrix[3][0..2]
	subl	$(3*FL),%esi
	addl	$(13*FL),%edi
	movb	$3,%cl
	rep
	movsl			# inverse[3][0..2]
	subl	$(3*FL),%edi
	movb	$NEGATIVE,%al
	addb	%al,(FL-1)(%edi)
	addb	%al,(2*FL-1)(%edi)
	addb	%al,(3*FL-1)(%edi)
	popl	%edi
	popl	%esi
	ret

	.align  8
Compute_Rotation_Transform:
	pushl	%ebx
	fldpi
	fdivs	degrees
	movl	12(%esp),%ebx   # v
	flds	1*FL(%ebx)
	fmul	%st(1),%st
	flds	(%ebx)
	fmul	%st(2),%st
	flds	2*FL(%ebx)
	fmulp	%st,%st(3)
	movl	8(%esp),%ebx    # transform
	pushl	%ebx
	call	MIdentity
	fsincos
	fsts	5*FL(%ebx)	# m11
	fstps	10*FL(%ebx)	# m22
	fsts	6*FL(%ebx)	# m12
	fchs
	fstps	9*FL(%ebx)	# m21
	push	%ebx
	addl	$(16*FL),%ebx   # t->inverse
	pushl	%ebx
	call	MTranspose      # zeroes eax+ecx
	addl	$8,%esp
	movl	$mat10,%eax
	pushl	%eax
	call	MIdentity
	movl	$mat10,%ebx
	fsincos
	fsts	(%ebx)		# m00
	fstps	10*FL(%ebx)	# m22
	fsts	8*FL(%ebx)	# m20
	fchs
	fstps	2*FL(%ebx)	# m02
	pushl	%ebx
	movl	(8+4)(%esp),%ebx
	push	%ebx
	push	%ebx
	call	MTimes
	addl	$12,%esp
	movl	$mat10,%eax
	pushl	%eax
	call	MITranspose
	addl	$4,%esp
	movl	(4+4)(%esp),%ebx
	addl	$(16*FL),%ebx	# t->inverse
	pushl	%ebx
	movl	$mat10,%eax
	pushl	%eax
	pushl	%ebx
	call	MTimes
	addl	$12,%esp
	pushl	%eax
	call	MIdentity
	fsincos
	movl	$mat10,%ebx
	fsts	(%ebx)		# m00
	fstps	5*FL(%ebx)	# m11
	fsts	1*FL(%ebx)	# m01
	fchs
	fstps	4*FL(%ebx)      # m10
	pushl	%ebx
	movl	(8+4)(%esp),%ebx
	pushl	%ebx
	pushl	%ebx
	call	MTimes
	addl	$12,%esp
	movl	$mat10,%eax
	pushl	%eax
	call	MITranspose
	addl	$4,%esp
	movl	(4+4)(%esp),%ebx
	addl	$(16*FL),%ebx	# t->inverse
	pushl	%ebx
	movl	$mat10,%eax
	pushl	%eax
	pushl	%ebx
	call	MTimes
	addl	$12,%esp
	popl	%ebx
	ret

	.align	8
Compute_Axis_Transform:
	flds	(12)(%esp)      # angle
	fsincos
	movl	(8)(%esp),%edx	# VCT3
	flds	(%edx)
	flds	1*FL(%edx)
	flds	2*FL(%edx)
	fld	%st
	fmul	%st,%st
	fld	%st(2)
	fmul	%st,%st
	faddp	%st,%st(1)
	fld	%st(3)
	fmul	%st,%st
	faddp	%st,%st(1)
	fsqrt
	fld1
	fdivp	%st,%st(1)
	movl	(4)(%esp),%edx	# t->matrix
	xorl	%eax,%eax
	fmul	%st,%st(3)
	fmul	%st,%st(2)
	fmulp	%st,%st(1)	# normalize
	fld	%st(2)
	fmul	%st,%st
	fld1
	fsub	%st(1),%st
	fmul	%st(5),%st
	faddp	%st,%st(1)
	fstps	(%edx)
	fld1
	fsub	%st(4),%st
	fmul	%st(2),%st
	fmul	%st(3),%st
	fld	%st(1)
	fmul	%st(6),%st
	faddp	%st,%st(1)
	fstps	1*FL(%edx)
	fld1
	fsub	%st(4),%st
	fmul	%st(3),%st
	fmul	%st(1),%st
	movl	%eax,3*FL(%edx)
	fld	%st(2)
	fmul	%st(6),%st
	fsubrp	%st,%st(1)
	fstps	2*FL(%edx)
	fld1
	fsub	%st(4),%st
	fmul	%st(3),%st
	fmul	%st(2),%st
	fld	%st(1)
	fmul	%st(6),%st
	fsubrp	%st,%st(1)
	fstps	4*FL(%edx)
	fld	%st(1)
	fmul	%st,%st
	fld1
	fsub	%st(1),%st
	fmul	%st(5),%st
	faddp	%st,%st(1)
	fstps	5*FL(%edx)
	fld1
	fsub	%st(4),%st
	fmul	%st(2),%st
	movl	%eax,7*FL(%edx)
	fmul	%st(1),%st
	fld	%st(3)
	fmul	%st(6),%st
	faddp	%st,%st(1)
	fstps	6*FL(%edx)
	fld1
	fsub	%st(4),%st
	fld	%st
	fmul	%st(4),%st
	fmul	%st(2),%st
	fld	%st(3)
	fmul	%st(7),%st
	faddp	%st,%st(1)
	fstps	8*FL(%edx)
	fmulp	%st,%st(2)
	fmul	%st,%st(1)
	fxch	%st(2)
	fmulp	%st,%st(4)
	fsubp	%st,%st(3)
	fmul	%st,%st
	fld1
	fsub	%st(1),%st
	movl	$FL,%ecx
	fmulp	%st,%st(2)
	faddp	%st,%st(1)
	fstps	10*FL(%edx)
	fstps	9*FL(%edx)
	pushl	%edi
	leal	11*FL(%edx),%edi
	fld1
	rep
	stosl
	fstps	(%edi)
	popl	%edi
	pushl	%edx            # t->matrix
	leal	16*FL(%edx),%edx
	pushl	%edx		# t->inverse
	call	MTranspose      # zeros eax+ecx
	addl	$8,%esp
	ret

	.align  8
Compose_Transforms:
	movl	4(%esp),%edx    # Original_Transform
	movl	8(%esp),%eax    # New_Transform
	pushl	%eax
	pushl	%edx
	pushl	%edx
	call	MTimes          # edx saved
	addl	$12,%esp
	addl	$(16*FL),%edx	# t->inverse
	pushl	%edx
	addl	$(16*FL),%eax	# t->inverse
	pushl	%eax
	pushl	%edx
	call	MTimes
	addl	$12,%esp
	ret
	# povasm.asm-Teil nach LINUX-float portiert
	# Compute_Axis_Transform nur formal getestet
	# braucht ray.s

	FL=4 # 8 for double
	.version "1.30"
	# 05.02.11
	.equ PII,1
	.equ ONE,0x3F800000 # change for double
	.equ NEGATIVE,0x80

	.data
	.extern Epsilon
	mat00: .fill 4,FL
	mat01: .fill 4,FL
	mat02: .fill 4,FL
	mat03: .fill 4,FL
	mat10: .fill 4,FL
	mat11: .fill 4,FL
	mat12: .fill 4,FL
	mat13: .fill 4,FL
	degrees:.float 180.0

	.text
	.extern vcross #void f(VCT3 d,s1,*s2)
	.globl MTimes #void f(MatrixT d,s1,*s2)
	.globl InvertMatrix #int f(VCT3 out[3],in[3])
	.globl MIdentity #void f(MatrixT *s)
	.globl MTranspose #void f(MatrixT d,MatrixT s)
	.globl MITranspose #void f(MatrixT *s)
	.globl MTransPoint #void f(VCT3 d,s,TransformT *t)
	.globl MInvTransPoint #void f(VCT3 d,s,TransformT *t)
	.globl MTransDirection #void f(VCT3 d,s,TransformT *t)
	.globl MInvTransDirection #void f(VCT3 d,s,TransformT *t)
	.globl MTransNormal #void f(VCT3 d,s,TransformT *t)
	.globl MTransNormalize #void f(VCT3 d,s,TransformT *t)
	.globl MInvTransNormal #void f(VCT3 d,s,TransformT *t)
	.globl Create_Transform #void f(TransformT *t)
	.globl Compute_Scaling_Transform #void f(TransformT d,VCT3 s)
	.globl Compute_Translation_Transform #void f(TransformT d,VCT3 s)
	.globl Compute_Rotation_Transform #void f(TransformT d VCT3 s);
	.globl Compute_Axis_Transform #void f(TransformT d,VCT3 s,float w)
	.globl Compose_Transforms #void f(MatrixT Original,New)

	.align 8
	MTimes: pushl %esi # clobbers ecx; eax+edx free
	pushl %edi
	pushl %ebx
	movl (12+12)(%esp),%esi
	xorl %ecx,%ecx
	movl (8+12)(%esp),%ebx
	movl $mat00,%edi
	movb $4,%cl
	mat2: flds (%esi)
	flds 1*FL(%esi)
	flds 2*FL(%esi)
	flds 3*FL(%esi)
	flds (%ebx)
	fmul %st,%st(4)
	movb $3,%ch
	fmul %st,%st(3)
	fmul %st,%st(2)
	fmulp %st,%st(1)
	.align 4
	mat1: addl $FL,%ebx
	addl $(4*FL),%esi
	flds (%ebx)
	fld %st
	fmuls (%esi)
	faddp %st,%st(5)
	fld %st
	fmuls 1*FL(%esi)
	faddp %st,%st(4)
	fld %st
	fmuls 2*FL(%esi)
	faddp %st,%st(3)
	decb %ch
	fmuls 3*FL(%esi)
	faddp %st,%st(1)
	jnz mat1
	fstps 3*FL(%edi)
	fstps 2*FL(%edi)
	subl $(12*FL),%esi
	fstps 1*FL(%edi)
	addl $FL,%ebx
	fstps (%edi)
	addl $(4*FL),%edi
	loop mat2
	movl $mat00,%esi
	movb $(4*FL),%cl
	popl %ebx
	movl (4+8)(%esp),%edi# result
	rep
	movsl
	popl %edi
	popl %esi
	ret

	.align 8
	InvertMatrix:
	movl 8(%esp),%ecx # in
	movl 4(%esp),%eax # out
	addl $(6*FL),%ecx # in[2]
	movl %ecx,%edx
	subl $(3*FL),%edx # in[1]
	call vcross
	addl $(3*FL),%edx # in[2]
	addl $(3*FL),%eax # out[1]
	subl $(6*FL),%ecx # in[0]
	call vcross
	subl $(6*FL),%edx # in[0]
	addl $(3*FL),%ecx # in[1]
	addl $(3*FL),%eax # out[2]
	call vcross
	flds (%edx) # in[0]x
	fld %st
	flds 4*FL(%edx) # in[1]y
	fmul %st,%st(2)
	flds 8*FL(%edx) # in[2]z
	fmul %st,%st(3)
	flds 1*FL(%edx) # in[0]y
	fmul %st,%st(1)
	flds 3*FL(%edx) # in[1]x
	fmul %st,%st(2)
	flds 2*FL(%edx) # in[0]z
	fmul %st,%st(1)
	flds 7*FL(%edx) # in[2]y
	fmul %st,%st(2)
	fxch %st(2)
	faddp %st,%st(7)
	fmulp %st,%st(4)
	fmulp %st,%st(4)
	flds 5*FL(%edx) # in[1]z
	fmul %st,%st(1)
	flds 6*FL(%edx) # in[2]x
	fmul %st,%st(2)
	fmulp %st,%st(4)
	fmulp %st,%st(4)
	faddp %st,%st(4)
	fsubrp %st,%st(3)
	fsubrp %st,%st(2)
	fsubrp %st,%st(1)
	fst %st(1)
	fabs
	fcomps Epsilon
	fnstsw
	sahf
	jae bgeps
	fstp %st
	xorl %eax,%eax
	ret
	nop
	bgeps: fld1
	fdivp %st,%st(1)
	movl 4(%esp),%ecx
	movl $8,%edx
	movl $1,%eax
	dschl: flds (%ecx,%edx,4) # fuer double: 8 statt 4
	fmul %st(1),%st
	fstps (%ecx,%edx,4) # dito
	decl %edx
	jns dschl
	fstp %st
	ret

	.align 8 # change for double
	MIdentity: # ebx taboo, ecx+eax clobbered
	pushl %edi
	movl $(4*FL-1),%ecx
	movl (4+4)(%esp),%edi
	xorl %eax,%eax
	rep
	stosl
	movl $ONE,%eax
	subl $(15*FL),%edi
	movl %eax,(%edi)
	movl %eax,5*FL(%edi)
	movl %eax,10*FL(%edi)
	movl %eax,15*FL(%edi)
	popl %edi
	ret $4

	.align 8
	Create_Transform:
	movl 4(%esp),%edx
	pushl %edx
	call MIdentity # matrix
	addl $(16*FL),%edx
	pushl %edx
	call MIdentity # inverse
	ret

	.align 8
	MTranspose:
	pushl %edi
	pushl %esi
	movl (4+8)(%esp),%edi
	movl (8+8)(%esp),%esi
	movl $4,%eax
	column: .align 2
	movl $4,%ecx
	line: .align 2
	movsl
	#movsl - bei double
	addl $(3*FL),%edi
	loop line
	subl $(15*FL),%edi
	decl %eax
	jnz column
	popl %esi
	popl %edi
	ret

	.align 8 # change for double
	MITranspose:
	movl 4(%esp),%edx
	movl 4*FL(%edx),%eax
	xchgl %eax,1*FL(%edx)
	movl %eax,4*FL(%edx)
	movl 8*FL(%edx),%eax
	xchgl %eax,2*FL(%edx)
	movl %eax,8*FL(%edx)
	movl 12*FL(%edx),%eax
	xchgl %eax,3*FL(%edx)
	movl %eax,12*FL(%edx)
	movl 9*FL(%edx),%eax
	xchgl %eax,6*FL(%edx)
	movl %eax,9*FL(%edx)
	movl 13*FL(%edx),%eax
	xchgl %eax,7*FL(%edx)
	movl %eax,13*FL(%edx)
	movl 14*FL(%edx),%eax
	xchgl %eax,11*FL(%edx)
	movl %eax,14*FL(%edx)
	ret

	.align 8
	MTransPoint:
	# movl 8(%esp),%edx # s
	# movl 12(%esp),%ecx # t
	flds (%edx)
	flds 1*FL(%edx)
	flds 2*FL(%edx)
	flds (%ecx)
	fmul %st(3),%st
	flds 1*FL(%ecx)
	fmul %st(4),%st
	flds 2*FL(%ecx)
	fmulp %st,%st(5)
	flds 4*FL(%ecx)
	fmul %st(4),%st
	faddp %st,%st(2)
	flds 5*FL(%ecx)
	fmul %st(4),%st
	faddp %st,%st(1)
	flds 6*FL(%ecx)
	fmulp %st,%st(4)
	flds 8*FL(%ecx)
	fmul %st(3),%st
	faddp %st,%st(2)
	flds 9*FL(%ecx)
	fmul %st(3),%st
	# movl 4(%esp),%eax # d
	faddp %st,%st(1)
	flds 10*FL(%ecx)
	fmulp %st,%st(3)
	fadds 13*FL(%ecx)
	fstps 1*FL(%eax)
	fadds 12*FL(%ecx)
	fstps (%eax)
	faddp %st,%st(1)
	fadds 14*FL(%ecx)
	faddp %st,%st(1)
	fstps 2*FL(%eax)
	ret

	.align 8
	MInvTransPoint:
	# movl 8(%esp),%edx # s
	# movl 12(%esp),%ecx # t
	flds (%edx)
	addl $(16*FL),%ecx # t->inverse
	flds 1*FL(%edx)
	flds 2*FL(%edx)
	flds (%ecx)
	fmul %st(3),%st
	flds 1*FL(%ecx)
	fmul %st(4),%st
	flds 2*FL(%ecx)
	fmulp %st,%st(5)
	flds 4*FL(%ecx)
	fmul %st(4),%st
	faddp %st,%st(2)
	flds 5*FL(%ecx)
	fmul %st(4),%st
	faddp %st,%st(1)
	flds 6*FL(%ecx)
	fmulp %st,%st(4)
	flds 8*FL(%ecx)
	fmul %st(3),%st
	faddp %st,%st(2)
	flds 9*FL(%ecx)
	fmul %st(3),%st
	# movl 4(%esp),%eax # d
	faddp %st,%st(1)
	fadds 13*FL(%ecx)
	fstps 1*FL(%eax)
	fadds 12*FL(%ecx)
	fstps (%eax)
	fmuls 10*FL(%ecx)
	faddp %st,%st(1)
	fadds 14*FL(%ecx)
	faddp %st,%st(1)
	fstps 2*FL(%eax)
	ret

	.align 8
	MTransDirection:
	# movl 8(%esp),%edx # s
	# movl 12(%esp),%ecx # t
	flds (%edx)
	flds 1*FL(%edx)
	flds 2*FL(%edx)
	flds (%ecx)
	fmul %st(3),%st
	flds 1*FL(%ecx)
	fmul %st(4),%st
	flds 2*FL(%ecx)
	fmulp %st,%st(5)
	flds 4*FL(%ecx)
	fmul %st(4),%st
	faddp %st,%st(2)
	flds 5*FL(%ecx)
	fmul %st(4),%st
	faddp %st,%st(1)
	flds 6*FL(%ecx)
	fmulp %st,%st(4)
	flds 8*FL(%ecx)
	fmul %st(3),%st
	faddp %st,%st(2)
	flds 9*FL(%ecx)
	fmul %st(3),%st
	# movl 4(%esp),%eax # d
	faddp %st,%st(1)
	flds 10*FL(%ecx)
	fmulp %st,%st(3)
	fstps 1*FL(%eax)
	fstps (%eax)
	faddp %st,%st(1)
	faddp %st,%st(1)
	fstps 2*FL(%eax)
	ret

	.align 8
	MInvTransDirection:
	# movl 8(%esp),%edx # s
	# movl 12(%esp),%ecx # t
	flds (%edx)
	leal 16*FL(%ecx),%ecx# t->inverse
	flds 1*FL(%edx)
	flds 2*FL(%edx)
	flds (%ecx)
	fmul %st(3),%st
	flds 1*FL(%ecx)
	fmul %st(4),%st
	flds 2*FL(%ecx)
	fmulp %st,%st(5)
	flds 4*FL(%ecx)
	fmul %st(4),%st
	faddp %st,%st(2)
	flds 5*FL(%ecx)
	fmul %st(4),%st
	faddp %st,%st(1)
	flds 6*FL(%ecx)
	fmulp %st,%st(4)
	flds 8*FL(%ecx)
	fmul %st(3),%st
	faddp %st,%st(2)
	flds 9*FL(%ecx)
	fmul %st(3),%st
	faddp %st,%st(1)
	flds 10*FL(%ecx)
	fmulp %st,%st(3)
	# movl 4(%esp),%eax # d
	fstps 1*FL(%eax)
	fstps (%eax)
	faddp %st,%st(1)
	faddp %st,%st(1)
	fstps 2*FL(%eax)
	ret

	.align 8
	MTransNormal:
	# movl 8(%esp),%edx # s
	# movl 12(%esp),%ecx # t
	flds (%edx)
	addl $(16*FL),%ecx # t->inverse
	flds 1*FL(%edx)
	flds 2*FL(%edx)
	flds (%ecx)
	fmul %st(3),%st
	flds 4*FL(%ecx)
	fmul %st(4),%st
	flds 8*FL(%ecx)
	fmulp %st,%st(5)
	flds 1*FL(%ecx)
	fmul %st(4),%st
	faddp %st,%st(2)
	flds 5*FL(%ecx)
	fmul %st(4),%st
	faddp %st,%st(1)
	flds 9*FL(%ecx)
	fmulp %st,%st(4)
	flds 2*FL(%ecx)
	fmul %st(3),%st
	faddp %st,%st(2)
	flds 6*FL(%ecx)
	fmul %st(3),%st
	# movl 4(%esp),%eax # d
	faddp %st,%st(1)
	flds 10*FL(%ecx)
	fmulp %st,%st(3)
	fstps 1*FL(%eax)
	fstps (%eax)
	faddp %st,%st(1)
	faddp %st,%st(1)
	fstps 2*FL(%eax)
	ret

	.align 8
	MTransNormalize:
	# movl 8(%esp),%edx # s
	# movl 12(%esp),%ecx # t
	flds (%edx)
	addl $(16*FL),%ecx # t->inverse
	flds 1*FL(%edx)
	flds 2*FL(%edx)
	flds (%ecx)
	fmul %st(3),%st
	flds 4*FL(%ecx)
	fmul %st(4),%st
	flds 8*FL(%ecx)
	fmulp %st,%st(5)
	flds 1*FL(%ecx)
	fmul %st(4),%st
	faddp %st,%st(2)
	flds 5*FL(%ecx)
	fmul %st(4),%st
	faddp %st,%st(1)
	flds 9*FL(%ecx)
	fmulp %st,%st(4)
	flds 2*FL(%ecx)
	fmul %st(3),%st
	faddp %st,%st(2)
	flds 6*FL(%ecx)
	fmul %st(3),%st
	faddp %st,%st(1)
	flds 10*FL(%ecx)
	fmulp %st,%st(3)
	fld %st
	fmul %st,%st
	fxch %st(5)
	faddp %st,%st(3)
	fxch %st(3)
	faddp %st,%st(2)
	fld %st
	fmul %st,%st
	faddp %st,%st(4)
	fld %st(1)
	fmul %st,%st
	fadd %st(4),%st
	fsqrt
	# movl 4(%esp),%ecx # d
	movl %eax,%ecx # for regparm only
	.ifndef PII
	fcoms Epsilon
	fnstsw
	sahf
	fld1
	jbe tfehl
	nop
	.else
	flds Epsilon
	fucomip %st(1),%st
	fld1
	ja tfehl
	.endif
	fdivp %st,%st(1)
	fmul %st,%st(3)
	fmul %st,%st(2)
	fmulp %st,%st(1)
	fstps (%ecx)
	fstps 2*FL(%ecx)
	fstps 1*FL(%ecx)
	fstp %st
	ret
	tfehl: fstps (%ecx)
	fucompp
	fucompp
	fstp %st
	fldz
	fsts 1*FL(%ecx)
	fstps 2*FL(%ecx)
	ret

	.align 8
	Compute_Scaling_Transform:
	pushl %edi
	xorl %eax,%eax
	movl (4+4)(%esp),%edi# d
	movl $(8*FL),%ecx
	rep
	stosl
	subl $(32*FL),%edi
	movl $ONE,%eax
	movl 12(%esp),%edx # s
	movl %eax,15*FL(%edi)
	fld1
	movl %eax,31*FL(%edi)
	flds (%edx)
	fsts (%edi)
	fdivr %st(1),%st
	fstps 16*FL(%edi)
	flds 1*FL(%edx)
	fsts 5*FL(%edi)
	fdivr %st(1),%st
	fstps 21*FL(%edi)
	flds 2*FL(%edx)
	fsts 10*FL(%edi)
	fdivrp %st,%st(1)
	fstps 26*FL(%edi)
	popl %edi
	ret

	.align 8
	Compute_Translation_Transform:
	pushl %esi
	pushl %edi
	movl $(8*FL),%ecx
	movl (4+8)(%esp),%edi
	xorl %eax,%eax
	rep
	stosl
	subl $(32*FL),%edi
	movl $ONE,%eax
	movl %eax,(%edi)
	movl %eax,5*FL(%edi)
	movl %eax,10*FL(%edi)
	movl %eax,15*FL(%edi)
	addl $(16*FL),%edi # t->inverse
	movl %eax,(%edi)
	movl %eax,5*FL(%edi)
	movl %eax,10*FL(%edi)
	movl %eax,15*FL(%edi)
	subl $(4*FL),%edi
	movl 16(%esp),%esi # VCT3
	movb $3,%cl
	rep
	movsl # matrix[3][0..2]
	subl $(3*FL),%esi
	addl $(13*FL),%edi
	movb $3,%cl
	rep
	movsl # inverse[3][0..2]
	subl $(3*FL),%edi
	movb $NEGATIVE,%al
	addb %al,(FL-1)(%edi)
	addb %al,(2*FL-1)(%edi)
	addb %al,(3*FL-1)(%edi)
	popl %edi
	popl %esi
	ret

	.align 8
	Compute_Rotation_Transform:
	pushl %ebx
	fldpi
	fdivs degrees
	movl 12(%esp),%ebx # v
	flds 1*FL(%ebx)
	fmul %st(1),%st
	flds (%ebx)
	fmul %st(2),%st
	flds 2*FL(%ebx)
	fmulp %st,%st(3)
	movl 8(%esp),%ebx # transform
	pushl %ebx
	call MIdentity
	fsincos
	fsts 5*FL(%ebx) # m11
	fstps 10*FL(%ebx) # m22
	fsts 6*FL(%ebx) # m12
	fchs
	fstps 9*FL(%ebx) # m21
	push %ebx
	addl $(16*FL),%ebx # t->inverse
	pushl %ebx
	call MTranspose # zeroes eax+ecx
	addl $8,%esp
	movl $mat10,%eax
	pushl %eax
	call MIdentity
	movl $mat10,%ebx
	fsincos
	fsts (%ebx) # m00
	fstps 10*FL(%ebx) # m22
	fsts 8*FL(%ebx) # m20
	fchs
	fstps 2*FL(%ebx) # m02
	pushl %ebx
	movl (8+4)(%esp),%ebx
	push %ebx
	push %ebx
	call MTimes
	addl $12,%esp
	movl $mat10,%eax
	pushl %eax
	call MITranspose
	addl $4,%esp
	movl (4+4)(%esp),%ebx
	addl $(16*FL),%ebx # t->inverse
	pushl %ebx
	movl $mat10,%eax
	pushl %eax
	pushl %ebx
	call MTimes
	addl $12,%esp
	pushl %eax
	call MIdentity
	fsincos
	movl $mat10,%ebx
	fsts (%ebx) # m00
	fstps 5*FL(%ebx) # m11
	fsts 1*FL(%ebx) # m01
	fchs
	fstps 4*FL(%ebx) # m10
	pushl %ebx
	movl (8+4)(%esp),%ebx
	pushl %ebx
	pushl %ebx
	call MTimes
	addl $12,%esp
	movl $mat10,%eax
	pushl %eax
	call MITranspose
	addl $4,%esp
	movl (4+4)(%esp),%ebx
	addl $(16*FL),%ebx # t->inverse
	pushl %ebx
	movl $mat10,%eax
	pushl %eax
	pushl %ebx
	call MTimes
	addl $12,%esp
	popl %ebx
	ret

	.align 8
	Compute_Axis_Transform:
	flds (12)(%esp) # angle
	fsincos
	movl (8)(%esp),%edx # VCT3
	flds (%edx)
	flds 1*FL(%edx)
	flds 2*FL(%edx)
	fld %st
	fmul %st,%st
	fld %st(2)
	fmul %st,%st
	faddp %st,%st(1)
	fld %st(3)
	fmul %st,%st
	faddp %st,%st(1)
	fsqrt
	fld1
	fdivp %st,%st(1)
	movl (4)(%esp),%edx # t->matrix
	xorl %eax,%eax
	fmul %st,%st(3)
	fmul %st,%st(2)
	fmulp %st,%st(1) # normalize
	fld %st(2)
	fmul %st,%st
	fld1
	fsub %st(1),%st
	fmul %st(5),%st
	faddp %st,%st(1)
	fstps (%edx)
	fld1
	fsub %st(4),%st
	fmul %st(2),%st
	fmul %st(3),%st
	fld %st(1)
	fmul %st(6),%st
	faddp %st,%st(1)
	fstps 1*FL(%edx)
	fld1
	fsub %st(4),%st
	fmul %st(3),%st
	fmul %st(1),%st
	movl %eax,3*FL(%edx)
	fld %st(2)
	fmul %st(6),%st
	fsubrp %st,%st(1)
	fstps 2*FL(%edx)
	fld1
	fsub %st(4),%st
	fmul %st(3),%st
	fmul %st(2),%st
	fld %st(1)
	fmul %st(6),%st
	fsubrp %st,%st(1)
	fstps 4*FL(%edx)
	fld %st(1)
	fmul %st,%st
	fld1
	fsub %st(1),%st
	fmul %st(5),%st
	faddp %st,%st(1)
	fstps 5*FL(%edx)
	fld1
	fsub %st(4),%st
	fmul %st(2),%st
	movl %eax,7*FL(%edx)
	fmul %st(1),%st
	fld %st(3)
	fmul %st(6),%st
	faddp %st,%st(1)
	fstps 6*FL(%edx)
	fld1
	fsub %st(4),%st
	fld %st
	fmul %st(4),%st
	fmul %st(2),%st
	fld %st(3)
	fmul %st(7),%st
	faddp %st,%st(1)
	fstps 8*FL(%edx)
	fmulp %st,%st(2)
	fmul %st,%st(1)
	fxch %st(2)
	fmulp %st,%st(4)
	fsubp %st,%st(3)
	fmul %st,%st
	fld1
	fsub %st(1),%st
	movl $FL,%ecx
	fmulp %st,%st(2)
	faddp %st,%st(1)
	fstps 10*FL(%edx)
	fstps 9*FL(%edx)
	pushl %edi
	leal 11*FL(%edx),%edi
	fld1
	rep
	stosl
	fstps (%edi)
	popl %edi
	pushl %edx # t->matrix
	leal 16*FL(%edx),%edx
	pushl %edx # t->inverse
	call MTranspose # zeros eax+ecx
	addl $8,%esp
	ret

	.align 8
	Compose_Transforms:
	movl 4(%esp),%edx # Original_Transform
	movl 8(%esp),%eax # New_Transform
	pushl %eax
	pushl %edx
	pushl %edx
	call MTimes # edx saved
	addl $12,%esp
	addl $(16*FL),%edx # t->inverse
	pushl %edx
	addl $(16*FL),%eax # t->inverse
	pushl %eax
	pushl %edx
	call MTimes
	addl $12,%esp
	ret