All assembly code from thargor6/mb3d project
| function FastLocateByte(const Where; Start, BSize: Integer; What: Word): Integer; assembler; pascal; | |
| asm | |
| push edi | |
| mov ecx, [bsize] | |
| sub ecx, [start] | |
| jz @notfound // No data to search | |
| mov edi, [where] | |
| add edi, [start] | |
| mov ax, [what] | |
| @search: | |
| repne scasb | |
| je @found | |
| @notfound: | |
| mov eax, -1 | |
| jmp @end | |
| @found: | |
| mov eax, edi | |
| dec eax | |
| sub eax, [where] | |
| @end: | |
| pop edi | |
| end; | |
| function FastLocate2Bytes(const where; start, bsize: integer; what: word):integer; assembler; pascal; far; | |
| asm | |
| push edi | |
| mov ecx, [bsize] | |
| sub ecx, [start] | |
| jz @notfound // No data to search | |
| mov edi, [where] | |
| add edi, [start] | |
| mov ax, [what] | |
| @search: | |
| repne scasb | |
| je @found | |
| @notfound: | |
| mov eax, -1 | |
| jmp @end | |
| @found: | |
| cmp [edi], ah | |
| jne @search | |
| mov eax, edi | |
| dec eax | |
| sub eax, [where] | |
| @end: | |
| pop edi | |
| end; | |
| function FastLocateDWord(var Where; BSize: Integer; What: LongInt): Integer; assembler; register; | |
| asm | |
| push edi | |
| mov edi, eax | |
| mov eax, ecx | |
| mov ecx, edx | |
| mov edx, edi | |
| @search: | |
| repne scasd | |
| je @found | |
| @notfound: | |
| mov eax, -1 | |
| jmp @end | |
| @found: | |
| mov eax, edi | |
| sub eax, edx | |
| shr eax, 2 | |
| dec eax | |
| @end: | |
| pop edi | |
| end; | |
| procedure ZeroMem( var dest; sizeof: integer ); assembler; register; | |
| asm | |
| push edi { protect edi } | |
| mov edi, eax { edi=@dest } | |
| xor eax, eax { eax=0 } | |
| mov ecx, edx | |
| shr ecx, 2 | |
| rep stosd | |
| mov ecx, edx | |
| bt ecx, 1 | |
| jnc @stobyte | |
| stosw | |
| @stobyte: | |
| bt ecx, 0 | |
| jnc @ende | |
| stosb | |
| @ende: | |
| pop edi | |
| end; | |
| procedure FillDWord(var Dest; Count: Integer; Value: Cardinal); assembler; register; | |
| asm | |
| push edi // protect edi | |
| mov edi, eax // edi=@dest | |
| mov eax, ecx // eax=Value | |
| mov ecx, edx | |
| rep stosd | |
| pop edi | |
| end; | |
| procedure FastFillChar( var dest; sizeof: integer; fill: byte ); assembler; register; | |
| asm | |
| push edi { protect edi } | |
| mov edi, eax { edi=@dest } | |
| mov ch, cl | |
| mov ax, cx | |
| bswap eax | |
| mov ax, cx | |
| mov ecx, edx | |
| shr ecx, 2 | |
| rep stosd | |
| mov ecx, edx | |
| bt ecx, 1 | |
| jnc @stobyte | |
| stosw | |
| @stobyte: | |
| bt ecx, 0 | |
| jnc @ende | |
| stosb | |
| @ende: | |
| pop edi | |
| end; | |
| function GetSwap2(A: Word): Word; assembler; register; | |
| asm | |
| mov cl, al | |
| mov al, ah | |
| mov ah, cl | |
| end; | |
| procedure Swap4(var A: Cardinal); assembler; register; | |
| asm | |
| mov ecx, [eax] | |
| bswap ecx | |
| mov [eax], ecx | |
| end; | |
| function GetSwap4(A: Cardinal): Cardinal; assembler; register; | |
| asm | |
| bswap eax | |
| end; | |
| procedure SwapDWords(var A,B); assembler; register; | |
| asm | |
| push ebx | |
| mov ebx, [eax] | |
| mov ecx, [edx] | |
| mov [eax], ecx | |
| mov [edx], ebx | |
| pop ebx | |
| end; | |
| function NotZeroSVec(sv: TPSVec): LongBool; //eax 0, $FFFFFFFF | |
| asm | |
| mov edx, [eax] | |
| or edx, [eax + 4] | |
| or edx, [eax + 8] | |
| xor eax, eax | |
| test edx, edx | |
| jz @@1 | |
| mov eax, $FFFFFFFF | |
| @@1: | |
| end; | |
| procedure Clamp0SvecSSE(sv1: TPSVec); | |
| asm | |
| movups xmm0, [eax] | |
| xorps xmm1, xmm1 | |
| maxps xmm0, xmm1 | |
| movups [eax], xmm0 | |
| end; | |
| procedure FlipVecs(V1, V2: TPVec3D); | |
| asm | |
| fld qword [eax] | |
| fld qword [eax + 8] | |
| fld qword [eax + 16] | |
| fld qword [edx] | |
| fld qword [edx + 8] | |
| fld qword [edx + 16] | |
| fstp qword [eax + 16] | |
| fstp qword [eax + 8] | |
| fstp qword [eax] | |
| fstp qword [edx + 16] | |
| fstp qword [edx + 8] | |
| fstp qword [edx] | |
| end; | |
| function YofSVec(sv: TPSVec): Single; | |
| asm // Result := sv[0] * s03 + sv[1] * s059 + sv[2] * s011; | |
| fld dword [eax] | |
| fmul s03 | |
| fld dword [eax + 4] | |
| fmul s059 | |
| faddp | |
| fld dword [eax + 8] | |
| fmul s011 | |
| faddp | |
| ftst | |
| fnstsw ax | |
| shr ah, 1 | |
| jnc @@1 | |
| fstp st | |
| fldz | |
| @@1: | |
| end; | |
| function MaxOfSVec(sv: TPSVec): Single; | |
| asm | |
| cmp SupportSSE, 0 | |
| jz @@1 | |
| push edx | |
| movss xmm0, [eax] | |
| maxss xmm0, [eax + 4] | |
| maxss xmm0, [eax + 8] | |
| movss [esp], xmm0 | |
| fld dword [esp] | |
| pop edx | |
| ret | |
| @@1: | |
| mov edx, eax | |
| fld dword [eax] | |
| fcom dword [edx + 4] | |
| fnstsw ax | |
| and ah, 41H | |
| jz @@up1 | |
| fstp st | |
| fld dword [edx + 4] | |
| @@up1: | |
| fcom dword [edx + 8] | |
| fnstsw ax | |
| and ah, 41H | |
| jz @@up2 | |
| fstp st | |
| fld dword [edx + 8] | |
| @@up2: | |
| end; | |
| function D7Bequal(d1, d2: Double7B): LongBool; | |
| asm | |
| push ecx | |
| mov ecx, [eax] | |
| cmp ecx, [edx] | |
| jne @@1 | |
| mov cx, [eax + 4] | |
| cmp cx, word [edx + 4] | |
| jne @@1 | |
| mov cl, [eax + 6] | |
| cmp cl, byte [edx + 6] | |
| jne @@1 | |
| mov eax, $FFFFFFFF | |
| jmp @@2 | |
| @@1: | |
| xor eax, eax | |
| @@2: | |
| pop ecx | |
| end; | |
| function D7BtoDouble(const D7B: Double7B): Double; | |
| asm | |
| add esp, -8 | |
| xor edx, edx | |
| mov [esp], edx | |
| mov edx, [eax] | |
| mov [esp + 1], edx | |
| mov edx, [eax + 3] | |
| mov [esp + 4], edx | |
| fld qword [esp] | |
| add esp, 8 | |
| end; | |
| function DoubleToD7B(const D: Double): Double7B; | |
| asm | |
| mov edx, [ebp + 9] | |
| mov [eax], edx | |
| mov edx, [ebp + 12] | |
| mov [eax + 3], edx | |
| end; | |
| procedure MakeWNormalsFromDVec(PsiLight: TPLNormals; PDVec: TPVec3D); | |
| asm | |
| fld qword [edx] | |
| fld st | |
| fmul st, st //x²,x | |
| fld qword [edx + 8] | |
| fld st | |
| fmul st, st //y²,y,x²,x | |
| faddp st(2), st //y,x²+y²,x | |
| fld qword [edx + 16] | |
| fld st | |
| fmul st, st //z²,z,y,x²+y²,x | |
| faddp st(3), st //z,y,x²+y²+z²,x | |
| fxch st(2) //x²+y²+z²,y,z,x | |
| fadd d1em100 | |
| fsqrt | |
| fdivr d32767 | |
| fmul st(3), st | |
| fmul st(2), st | |
| fmulp //y',z',x' | |
| fistp word [eax + 2] | |
| fistp word [eax + 4] | |
| fistp word [eax] | |
| end; | |
| function FastIntPow(const base: Single; const expo: Integer): Single; //powers with expo in 2^x x in[1..much] for spec painting, if ipol, expo could be float! | |
| asm | |
| fld dword [ebp + 8] | |
| mov edx, eax | |
| ftst | |
| fnstsw ax | |
| shr ah, 1 | |
| jnc @@1 | |
| fstp st | |
| fldz | |
| jmp @@end | |
| @@1: | |
| shr edx, 1 | |
| @@2: | |
| fmul st, st | |
| shr edx, 1 | |
| jnz @@2 | |
| @@end: | |
| end; | |
| function Clamp0D(const d: Double): Double; | |
| asm | |
| fld qword [ebp + 8] | |
| ftst | |
| fnstsw ax | |
| shr ah, 1 | |
| jnc @@end | |
| fstp st | |
| fldz | |
| @@end: | |
| end; | |
| function Clamp01S(const sv: Single): Single; | |
| asm | |
| fld dword [ebp + 8] | |
| ftst | |
| fnstsw ax | |
| shr ah, 1 | |
| jnc @@1 | |
| fstp st | |
| fldz | |
| jmp @@end | |
| @@1: | |
| fld1 | |
| fcomp st(1) | |
| fnstsw ax | |
| shr ah, 1 | |
| jnc @@end | |
| fstp st | |
| fld1 | |
| @@end: | |
| end; //ret 4 | |
| function Clamp01D(const dv: Double): Double; | |
| asm | |
| fld qword [ebp + 8] | |
| ftst | |
| fnstsw ax | |
| shr ah, 1 | |
| jnc @@1 | |
| fstp st | |
| fldz | |
| jmp @@end | |
| @@1: | |
| fld1 | |
| fcomp st(1) | |
| fnstsw ax | |
| shr ah, 1 | |
| jnc @@end | |
| fstp st | |
| fld1 | |
| @@end: | |
| end; | |
| function MakeSplineCoeff(const xs: Double): TSVec; | |
| asm | |
| fld d1d6 | |
| fld qword [ebp + 8] | |
| fld st | |
| fmul st, st | |
| fmul st, st(1) | |
| fmul st, st(2) | |
| fst dword [eax + 12] //Result[3],xs,d1d6 | |
| fld1 | |
| fsub st, st(2) //1-xs,Result[3],xs,d1d6 | |
| fmul st, st(2) | |
| fmul s05 | |
| fsubp st(3), st //Result[3],xs,d1d6 + 0.5 * xs * (xs - 1.0) | |
| fsub st(2), st //Result[3],xs,Result[0] | |
| fxch //xs,Result[3],Result[0] | |
| fadd st, st(2) | |
| fsub st, st(1) | |
| fsub st, st(1) //Result[2],Result[3],Result[0] | |
| fst dword [eax + 8] | |
| fld1 | |
| fsubrp //1-Result[2],Result[3],Result[0] | |
| fsubrp //1-Result[2]-Result[3],Result[0] | |
| fsub st, st(1) //1-Result[2]-Result[3]-Result[0],Result[0] | |
| fstp dword [eax + 4] | |
| fstp dword [eax] | |
| end; | |
| function Add2SVecsWeight2(const sv1, sv2: TSVec; const w2: Single): TSVec; | |
| asm | |
| fld dword [edx] | |
| fld dword [edx + 4] | |
| fld dword [edx + 8] | |
| fld dword [ebp + 8] | |
| fmul st(3), st | |
| fmul st(2), st | |
| fmulp | |
| fadd dword [eax + 8] | |
| fstp dword [ecx + 8] | |
| fadd dword [eax + 4] | |
| fstp dword [ecx + 4] | |
| fadd dword [eax] | |
| fstp dword [ecx] | |
| xor eax, eax | |
| mov [ecx + 12], eax | |
| end; | |
| function LinInterpolate2SVecs(const sv1, sv2: TSVec; const w1: Single): TSVec; | |
| asm | |
| cmp SupportSSE, 0 | |
| jz @@1 | |
| movss xmm2, [ebp + 8] | |
| movups xmm0, [eax] | |
| movups xmm1, [edx] | |
| shufps xmm2, xmm2, 0 | |
| subps xmm0, xmm1 | |
| mulps xmm0, xmm2 | |
| addps xmm0, xmm1 | |
| movups [ecx], xmm0 | |
| pop ebp | |
| ret 4 | |
| @@1: | |
| fld dword [edx] | |
| fld dword [edx + 4] | |
| fld dword [edx + 8] | |
| fld dword [edx + 12] | |
| fld dword [ebp + 8] | |
| fld dword [eax] | |
| fld dword [eax + 4] | |
| fld dword [eax + 8] //s12,s11,s10,w1,s23,s22,s21,s20 | |
| fsub st, st(5) | |
| fmul st, st(3) | |
| faddp st(5), st //s11,s10,w1,s23,result2,s21,s20 | |
| fsub st, st(5) | |
| fmul st, st(2) | |
| faddp st(5), st //s10,w1,s23,result2,result1,s20 | |
| fsub st, st(5) | |
| fmul st, st(1) | |
| faddp st(5), st //w1,s23,result2,result1,result0 | |
| fld dword [eax + 12] | |
| fsub st, st(2) //..,w1,s23,result2,result1,result0 | |
| fmulp //..*w1,s23,result2,result1,result0 | |
| faddp | |
| fstp dword [ecx + 12] | |
| fstp dword [ecx + 8] | |
| fstp dword [ecx + 4] | |
| fstp dword [ecx] | |
| end; | |
| function Add2SVecsWeight(const sv1, sv2: TSVec; const w1, w2: Single): TSVec; | |
| asm | |
| cmp SupportSSE, 0 | |
| jz @@1 | |
| movss xmm2, [ebp + 12] | |
| movss xmm3, [ebp + 8] | |
| movups xmm0, [eax] | |
| movups xmm1, [edx] | |
| shufps xmm2, xmm2, $C0 | |
| shufps xmm3, xmm3, $C0 | |
| mulps xmm0, xmm2 | |
| mulps xmm1, xmm3 | |
| addps xmm0, xmm1 | |
| movups [ecx], xmm0 | |
| pop ebp | |
| ret 8 | |
| @@1: | |
| fld dword [edx] | |
| fld dword [edx + 4] | |
| fld dword [edx + 8] | |
| fld dword [ebp + 8] | |
| fmul st(3), st | |
| fmul st(2), st | |
| fmulp | |
| fld dword [eax] | |
| fld dword [eax + 4] | |
| fld dword [eax + 8] | |
| fld dword [ebp + 12] | |
| fmul st(3), st | |
| fmul st(2), st | |
| fmulp //s12,s11,s10,s22,s21,s20 | |
| xor eax, eax | |
| faddp st(3), st //s11,s10,result2,s21,s20 | |
| faddp st(3), st | |
| faddp st(3), st //result2,result1,result0 | |
| fstp dword [ecx + 8] | |
| fstp dword [ecx + 4] | |
| fstp dword [ecx] | |
| mov [ecx + 12], eax | |
| end; | |
| procedure ClearSVec(var sv: TSVec); | |
| asm | |
| xor edx, edx | |
| mov [eax], edx | |
| mov [eax + 4], edx | |
| mov [eax + 8], edx | |
| mov [eax + 12], edx | |
| end; | |
| procedure ClearDVec(var dv: TVec3D); | |
| asm | |
| fldz | |
| fst qword [eax] | |
| fst qword [eax + 8] | |
| fstp qword [eax + 16] | |
| end; | |
| procedure mClampSqrtSVecV(v: TPSVec); | |
| asm | |
| xor edx, edx | |
| mov [eax + 12], edx | |
| cmp SupportSSE, 0 | |
| jz @@1 | |
| movups xmm0, [eax] | |
| xorps xmm1, xmm1 | |
| maxps xmm0, xmm1 | |
| sqrtps xmm0, xmm0 | |
| movups [eax], xmm0 | |
| ret | |
| @@1: | |
| mov edx, eax | |
| fld dword [edx] | |
| ftst | |
| fnstsw ax | |
| shr ah, 1 | |
| jnc @@2 | |
| fstp st | |
| fldz | |
| jmp @@21 | |
| @@2: | |
| fsqrt | |
| @@21: | |
| fstp dword [edx] | |
| fld dword [edx + 4] | |
| ftst | |
| fnstsw ax | |
| shr ah, 1 | |
| jnc @@3 | |
| fstp st | |
| fldz | |
| jmp @@31 | |
| @@3: | |
| fsqrt | |
| @@31: | |
| fstp dword [edx + 4] | |
| fld dword [edx + 8] | |
| ftst | |
| fnstsw ax | |
| shr ah, 1 | |
| jnc @@4 | |
| fstp st | |
| fldz | |
| jmp @@41 | |
| @@4: | |
| fsqrt | |
| @@41: | |
| fstp dword [edx + 8] | |
| end; | |
| procedure mClampSqrSVecV(v: TPSVec); | |
| asm | |
| xor edx, edx | |
| mov [eax + 12], edx | |
| cmp SupportSSE, 0 | |
| jz @@1 | |
| movups xmm0, [eax] | |
| xorps xmm1, xmm1 | |
| maxps xmm0, xmm1 | |
| mulps xmm0, xmm0 | |
| movups [eax], xmm0 | |
| ret | |
| @@1: | |
| mov edx, eax | |
| fld dword [edx] | |
| ftst | |
| fnstsw ax | |
| shr ah, 1 | |
| jnc @@2 | |
| fstp st | |
| fldz | |
| @@2: | |
| fmul st, st | |
| fstp dword [edx] | |
| fld dword [edx + 4] | |
| ftst | |
| fnstsw ax | |
| shr ah, 1 | |
| jnc @@3 | |
| fstp st | |
| fldz | |
| @@3: | |
| fmul st, st | |
| fstp dword [edx + 4] | |
| fld dword [edx + 8] | |
| ftst | |
| fnstsw ax | |
| shr ah, 1 | |
| jnc @@4 | |
| fstp st | |
| fldz | |
| @@4: | |
| fmul st, st | |
| fstp dword [edx + 8] | |
| end; | |
| function FastPow(const x, y: Single): Single; //used by vis light 3 | |
| asm // Result := x / (y - x * y + x); | |
| fld dword [ebp+12] | |
| fld st | |
| fmul dword [ebp+8] | |
| fsubr dword [ebp+8] | |
| fadd st, st(1) | |
| fdivp | |
| end; | |
| function MakeSVecFromNormalsD(PsiLight: Pointer): TSVec; | |
| const d3: Double = 3.0518509476e-5; | |
| asm | |
| fild word [eax] | |
| fild word [eax + 2] | |
| fild word [eax + 4] | |
| fld d3 | |
| fmul st(3), st | |
| fmul st(2), st | |
| fmulp | |
| xor eax, eax | |
| fstp dword [edx + 8] | |
| fstp dword [edx + 4] | |
| fstp dword [edx] | |
| mov [edx + 12], eax | |
| end; | |
| function MinMaxSVecSSE(const smin, smax: Single; const V1: TSVec): TSVec; | |
| asm | |
| movss xmm1, [ebp + 12] | |
| movss xmm2, [ebp + 8] | |
| movups xmm0, [eax] | |
| shufps xmm1, xmm1, 0 | |
| shufps xmm2, xmm2, 0 | |
| maxps xmm0, xmm1 | |
| minps xmm0, xmm2 | |
| movups [edx], xmm0 | |
| end; | |
| function mSqrtSVec(const V1: TSVec): TSVec; | |
| asm | |
| cmp SupportSSE, 0 | |
| jz @@1 | |
| movups xmm0, [eax]; | |
| sqrtps xmm0, xmm0; | |
| movups [edx], xmm0; | |
| ret | |
| @@1: | |
| fld dword [eax] | |
| fld dword [eax + 4] | |
| fld dword [eax + 8] | |
| fsqrt | |
| xor eax, eax | |
| fstp dword [edx + 8] | |
| fsqrt | |
| fstp dword [edx + 4] | |
| fsqrt | |
| fstp dword [edx] | |
| mov [edx + 12], eax | |
| end; | |
| function LengthOfVec(const V: TVec3D): Double; | |
| asm // Result := Sqrt(Sqr(V[0]) + Sqr(V[1]) + Sqr(V[2])); | |
| fld qword [eax] | |
| fmul st, st | |
| fld qword [eax+8] | |
| fmul st, st | |
| faddp | |
| fld qword [eax+16] | |
| fmul st, st | |
| faddp | |
| fsqrt | |
| end; | |
| function SqrLengthOfVec(const V: TVec3D): Double; | |
| asm | |
| fld qword [eax] | |
| fmul st, st | |
| fld qword [eax+8] | |
| fmul st, st | |
| faddp | |
| fld qword [eax+16] | |
| fmul st, st | |
| faddp | |
| end; | |
| function SqrLengthOfSVec(const V: TSVec): Single; | |
| asm //eax st Result := Sqr(V[0]) + Sqr(V[1]) + Sqr(V[2]); | |
| fld dword [eax] | |
| fmul st, st | |
| fld dword [eax + 4] | |
| fmul st, st | |
| faddp | |
| fld dword [eax + 8] | |
| fmul st, st | |
| faddp | |
| end; | |
| function NormaliseVector(V: TPVec3D): TVec3D; | |
| asm //max 4 st slots useable because of calling formula | |
| fld qword [eax] | |
| fld st //v0,v0 | |
| fmul st, st //v0²,v0 | |
| fld qword [eax + 8] | |
| fld st //v1,v1,vo²,vo | |
| fmul st, st //v1²,v1,v0²,v0 | |
| faddp st(2), st //v1,v0²+v1²,v0 | |
| fld qword [eax + 16] | |
| fmul st, st //v2²,v1,v0²+v1²,v0 | |
| fadd d1em100 | |
| faddp st(2), st //v1,v0²+v1²+v2²,v0 | |
| fxch //v0²+v1²+v2²,v1,v0 | |
| fsqrt //r,v1,v0 | |
| fld1 //1,r,v1,v0 | |
| fdivrp //1/r,v1,v0 | |
| fmul st(2), st | |
| fmul st(1), st | |
| fmul qword [eax + 16] //v2',v1',v0' | |
| fstp qword [edx + 16] | |
| fstp qword [edx + 8] | |
| fstp qword [edx] // | |
| end; | |
| procedure NormaliseVectorVar(var V: TVec3D); | |
| asm | |
| fld qword [eax] | |
| fld st //v0,v0 | |
| fmul st, st //v0²,v0 | |
| fld qword [eax + 8] | |
| fld st | |
| fmul st, st //v1²,v1,v0²,v0 | |
| faddp st(2), st //v1,v0²+v1²,v0 | |
| fld qword [eax + 16] | |
| fld st //v2,v2,v1,v0²+v1²,v0 | |
| fmul st, st //v2²,v2,v1,v0²+v1²,v0 | |
| fadd d1em100 | |
| faddp st(3), st //v2,v1,v0²+v1²+v2²,v0 | |
| fxch st(2) //v0²+v1²+v2²,v1,v2,v0 | |
| fsqrt | |
| fld1 | |
| fdivrp | |
| fmul st(3), st | |
| fmul st(2), st | |
| fmulp //v1',v2',v0' | |
| fstp qword [eax + 8] | |
| fstp qword [eax + 16] | |
| fstp qword [eax] //} | |
| end; | |
| procedure NormaliseSVectorVar(var V: TSVec); | |
| asm | |
| fld dword [eax] | |
| fld st //v0,v0 | |
| fmul st, st //v0²,v0 | |
| fld dword [eax + 4] | |
| fld st | |
| fmul st, st //v1²,v1,v0²,v0 | |
| faddp st(2), st //v1,v0²+v1²,v0 | |
| fld dword [eax + 8] | |
| fld st //v2,v2,v1,v0²+v1²,v0 | |
| fmul st, st //v2²,v2,v1,v0²+v1²,v0 | |
| fadd s1em30 | |
| faddp st(3), st //v2,v1,v0²+v1²+v2²,v0 | |
| fxch st(2) //v0²+v1²+v2²,v1,v2,v0 | |
| fsqrt | |
| fld1 | |
| fdivrp | |
| fmul st(3), st | |
| fmul st(2), st | |
| fmulp //v1',v2',v0' | |
| fstp dword [eax + 4] | |
| fstp dword [eax + 8] | |
| fstp dword [eax] | |
| end; | |
| function NormaliseVectorTo(const n: Double; const V: TVec3D): TVec3D; overload; | |
| asm | |
| fld qword [eax] | |
| fld st //v0,v0 | |
| fmul st, st //v0²,v0 | |
| fld qword [eax + 8] | |
| fld st | |
| fmul st, st //v1²,v1,v0²,v0 | |
| faddp st(2), st //v1,v0²+v1²,v0 | |
| fld qword [eax + 16] | |
| fld st //v2,v2,v1,v0²+v1²,v0 | |
| fmul st, st //v2²,v2,v1,v0²+v1²,v0 | |
| fadd d1em100 | |
| faddp st(3), st //v2,v1,v0²+v1²+v2²,v0 | |
| fxch st(2) //v0²+v1²+v2²,v1,v2,v0 | |
| fsqrt | |
| fld qword [ebp + 8] | |
| fdivrp | |
| fmul st(3), st | |
| fmul st(2), st | |
| fmulp //v1',v2',v0' | |
| fstp qword [edx + 8] | |
| fstp qword [edx + 16] | |
| fstp qword [edx] | |
| end; | |
| procedure NormaliseVectorTo(const n: Double; V: TPVec3D); overload; | |
| asm | |
| fld qword [eax] | |
| fld st //v0,v0 | |
| fmul st, st //v0²,v0 | |
| fld qword [eax + 8] | |
| fld st | |
| fmul st, st //v1²,v1,v0²,v0 | |
| faddp st(2), st //v1,v0²+v1²,v0 | |
| fld qword [eax + 16] | |
| fld st //v2,v2,v1,v0²+v1²,v0 | |
| fmul st, st //v2²,v2,v1,v0²+v1²,v0 | |
| fadd d1em100 | |
| faddp st(3), st //v2,v1,v0²+v1²+v2²,v0 | |
| fxch st(2) //v0²+v1²+v2²,v1,v2,v0 | |
| fsqrt | |
| fld qword [ebp + 8] | |
| fdivrp | |
| fmul st(3), st | |
| fmul st(2), st | |
| fmulp //v1',v2',v0' | |
| fstp qword [eax + 8] | |
| fstp qword [eax + 16] | |
| fstp qword [eax] | |
| end; | |
| function NormaliseSVector(const V: TSVec): TSVec; //..in SSE | |
| asm | |
| fld dword [eax] | |
| fld st //v0,v0 | |
| fmul st, st //v0²,v0 | |
| fld dword [eax + 4] | |
| fld st | |
| fmul st, st //v1²,v1,v0²,v0 | |
| faddp st(2), st //v1,v0²+v1²,v0 | |
| fld dword [eax + 8] | |
| fld st //v2,v2,v1,v0²+v1²,v0 | |
| fmul st, st //v2²,v2,v1,v0²+v1²,v0 | |
| fadd d1em100 | |
| faddp st(3), st //v2,v1,v0²+v1²+v2²,v0 | |
| fxch st(2) //v0²+v1²+v2²,v1,v2,v0 | |
| fsqrt | |
| fld1 | |
| fdivrp | |
| fmul st(3), st | |
| fmul st(2), st | |
| fmulp //v1',v2',v0' | |
| fstp dword [edx + 4] | |
| fstp dword [edx + 8] | |
| fstp dword [edx] | |
| end; | |
| procedure SVecToNormals(const sv: TSVec; pn: Pointer); | |
| const d32767: Double = 32767; | |
| asm | |
| fld dword [eax] | |
| fld st //v0,v0 | |
| fmul st, st //v0²,v0 | |
| fld dword [eax + 4] | |
| fld st | |
| fmul st, st //v1²,v1,v0²,v0 | |
| faddp st(2), st //v1,v0²+v1²,v0 | |
| fld dword [eax + 8] | |
| fld st //v2,v2,v1,v0²+v1²,v0 | |
| fmul st, st //v2²,v2,v1,v0²+v1²,v0 | |
| fadd d1em100 | |
| faddp st(3), st //v2,v1,v0²+v1²+v2²,v0 | |
| fxch st(2) //v0²+v1²+v2²,v1,v2,v0 | |
| fsqrt | |
| fld d32767 | |
| fdivrp | |
| fmul st(3), st | |
| fmul st(2), st | |
| fmulp //v1',v2',v0' | |
| fistp word [edx + 2] | |
| fistp word [edx + 4] | |
| fistp word [edx + 0] | |
| end; | |
| procedure RotateVector(V: TPVec3D; M: TPMatrix3); //is like reversed S version | |
| asm | |
| fld qword [edx] | |
| fld qword [edx + 24] | |
| fld qword [edx + 48] | |
| fld qword [eax] | |
| fmul st(1), st(0) | |
| fmul st(2), st(0) | |
| fmulp st(3), st(0) | |
| fld qword [edx + 8] | |
| fld qword [edx + 32] | |
| fld qword [edx + 56] | |
| fld qword [eax + 8] | |
| fmul st(1), st(0) | |
| fmul st(2), st(0) | |
| fmulp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| fld qword [edx + 16] | |
| fld qword [edx + 40] | |
| fld qword [edx + 64] | |
| fld qword [eax + 16] | |
| fmul st(1), st(0) | |
| fmul st(2), st(0) | |
| fmulp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| fstp qword [eax + 16] | |
| fstp qword [eax + 8] | |
| fstp qword [eax] | |
| end; | |
| procedure RotateVectorReverse(V: TPVec3D; M: TPMatrix3); | |
| asm | |
| fld qword [edx] | |
| fld qword [edx + 8] | |
| fld qword [edx + 16] | |
| fld qword [eax] | |
| fmul st(1), st(0) | |
| fmul st(2), st(0) | |
| fmulp st(3), st(0) | |
| fld qword [edx + 24] | |
| fld qword [edx + 32] | |
| fld qword [edx + 40] | |
| fld qword [eax + 8] | |
| fmul st(1), st(0) | |
| fmul st(2), st(0) | |
| fmulp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| fld qword [edx + 48] | |
| fld qword [edx + 56] | |
| fld qword [edx + 64] | |
| fld qword [eax + 16] | |
| fmul st(1), st(0) | |
| fmul st(2), st(0) | |
| fmulp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| fstp qword [eax + 16] | |
| fstp qword [eax + 8] | |
| fstp qword [eax] | |
| end; | |
| procedure RotateSVector(V: TPSVec; M: TPMatrix3); | |
| asm | |
| fld qword [edx] | |
| fld qword [edx + 8] | |
| fld qword [edx + 16] | |
| fld dword [eax] | |
| fmul st(1), st(0) | |
| fmul st(2), st(0) | |
| fmulp st(3), st(0) | |
| fld qword [edx + 24] | |
| fld qword [edx + 32] | |
| fld qword [edx + 40] | |
| fld dword [eax + 4] | |
| fmul st(1), st(0) | |
| fmul st(2), st(0) | |
| fmulp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| fld qword [edx + 48] | |
| fld qword [edx + 56] | |
| fld qword [edx + 64] | |
| fld dword [eax + 8] | |
| fmul st(1), st(0) | |
| fmul st(2), st(0) | |
| fmulp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| fstp dword [eax + 8] | |
| fstp dword [eax + 4] | |
| fstp dword [eax] | |
| end; | |
| procedure RotateSVectorReverse(V: TPSVec; M: TPMatrix3); | |
| asm | |
| fld qword [edx] | |
| fld qword [edx + 24] | |
| fld qword [edx + 48] | |
| fld dword [eax] | |
| fmul st(1), st(0) | |
| fmul st(2), st(0) | |
| fmulp st(3), st(0) | |
| fld qword [edx + 8] | |
| fld qword [edx + 32] | |
| fld qword [edx + 56] | |
| fld dword [eax + 4] | |
| fmul st(1), st(0) | |
| fmul st(2), st(0) | |
| fmulp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| fld qword [edx + 16] | |
| fld qword [edx + 40] | |
| fld qword [edx + 64] | |
| fld dword [eax + 8] | |
| fmul st(1), st(0) | |
| fmul st(2), st(0) | |
| fmulp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| fstp dword [eax + 8] | |
| fstp dword [eax + 4] | |
| fstp dword [eax] | |
| end; | |
| procedure RotateSVectorS(V: TPSVec; M: TPSMatrix3); //in calcpixelcol | |
| asm // eax edx | |
| cmp SupportSSE, 0 | |
| jz @@1 | |
| movss xmm0, [eax] | |
| movss xmm1, [eax + 4] | |
| movss xmm2, [eax + 8] | |
| shufps xmm0, xmm0, 0 | |
| shufps xmm1, xmm1, 0 | |
| shufps xmm2, xmm2, 0 | |
| movups xmm4, [edx] | |
| movups xmm5, [edx + 16] | |
| movups xmm6, [edx + 32] | |
| mulps xmm4, xmm0 //m0*v0 | |
| mulps xmm5, xmm1 //m1*v1 | |
| mulps xmm6, xmm2 //m2*v2 | |
| addps xmm4, xmm5 | |
| addps xmm4, xmm6 | |
| movups [eax], xmm4 | |
| ret | |
| @@1: | |
| fld dword [edx] //M[0,0] | |
| fld dword [edx + 4] | |
| fld dword [edx + 8] //M[0,2],M[0,1],M[0,0] | |
| fld dword [eax] //V[0],M[0,2],M[0,1],M[0,0] | |
| fmul st(1), st(0) | |
| fmul st(2), st(0) | |
| fmulp st(3), st(0) //M[0,2]*V[0],M[0,1]*V[0],M[0,0]*V[0] | |
| fld dword [edx + 16] | |
| fld dword [edx + 20] | |
| fld dword [edx + 24] | |
| fld dword [eax + 4] //v[1],M[1,2],M[1,1],M[1,0], M[0,2]*V[0],M[0,1]*V[0],M[0,0]*V[0] | |
| fmul st(1), st(0) | |
| fmul st(2), st(0) | |
| fmulp st(3), st(0) //M[1,2]*V[1],M[1,1]*V[1],M[1,0]*V[1], M[0,2]*V[0],M[0,1]*V[0],M[0,0]*V[0] | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) //M[1,2]*V[1]+M[0,2]*V[0], M[1,1]*V[1]+M[0,1]*V[0], M[1,0]*V[1]+M[0,0]*V[0] | |
| fld dword [edx + 32] | |
| fld dword [edx + 36] | |
| fld dword [edx + 40] | |
| fld dword [eax + 8] | |
| fmul st(1), st(0) | |
| fmul st(2), st(0) | |
| fmulp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| fstp dword [eax + 8] //v2=m02*v0+m12*v1+m22*v2 | |
| fstp dword [eax + 4] //v1=m01*v0+m11*v1+m21*v2 | |
| fstp dword [eax] //v0=m00*v0+m10*v1+m20*v2 | |
| end; | |
| procedure RotateVectorS(V: TPVec3D; M: TPSMatrix3); | |
| asm | |
| fld dword [edx] //M[0,0] | |
| fld dword [edx + 4] | |
| fld dword [edx + 8] //M[0,2],M[0,1],M[0,0] | |
| fld qword [eax] //V[0],M[0,2],M[0,1],M[0,0] | |
| fmul st(1), st(0) | |
| fmul st(2), st(0) | |
| fmulp st(3), st(0) //M[0,2]*V[0],M[0,1]*V[0],M[0,0]*V[0] | |
| fld dword [edx + 16] | |
| fld dword [edx + 20] | |
| fld dword [edx + 24] | |
| fld qword [eax + 8] //v[1],M[1,2],M[1,1],M[1,0], M[0,2]*V[0],M[0,1]*V[0],M[0,0]*V[0] | |
| fmul st(1), st(0) | |
| fmul st(2), st(0) | |
| fmulp st(3), st(0) //M[1,2]*V[1],M[1,1]*V[1],M[1,0]*V[1], M[0,2]*V[0],M[0,1]*V[0],M[0,0]*V[0] | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) //M[1,2]*V[1]+M[0,2]*V[0], M[1,1]*V[1]+M[0,1]*V[0], M[1,0]*V[1]+M[0,0]*V[0] | |
| fld dword [edx + 32] | |
| fld dword [edx + 36] | |
| fld dword [edx + 40] | |
| fld qword [eax + 16] | |
| fmul st(1), st(0) | |
| fmul st(2), st(0) | |
| fmulp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| fstp qword [eax + 16] //v2=m02*v0+m12*v1+m22*v2 | |
| fstp qword [eax + 8] //v1=m01*v0+m11*v1+m21*v2 | |
| fstp qword [eax] //v0=m00*v0+m10*v1+m20*v2 | |
| end; | |
| procedure RotateVectorReverseS(V: TPVec3D; M: TPSMatrix3); | |
| asm | |
| fld dword [edx] | |
| fld dword [edx + 16] | |
| fld dword [edx + 32] | |
| fld qword [eax] | |
| fmul st(1), st(0) | |
| fmul st(2), st(0) | |
| fmulp st(3), st(0) | |
| fld dword [edx + 4] | |
| fld dword [edx + 20] | |
| fld dword [edx + 36] | |
| fld qword [eax + 8] | |
| fmul st(1), st(0) | |
| fmul st(2), st(0) | |
| fmulp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| fld dword [edx + 8] | |
| fld dword [edx + 24] | |
| fld dword [edx + 40] | |
| fld qword [eax + 16] | |
| fmul st(1), st(0) | |
| fmul st(2), st(0) | |
| fmulp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| fstp qword [eax + 16] | |
| fstp qword [eax + 8] | |
| fstp qword [eax] | |
| end; | |
| procedure RotateSVectorReverseS(V: TPSVec; M: TPSMatrix3); | |
| asm | |
| fld dword [edx] | |
| fld dword [edx + 16] | |
| fld dword [edx + 32] //M[2,0], M[1,0], M[0,0] | |
| fld dword [eax] //V[0] | |
| fmul st(1), st(0) //V[0], V[0]*M[2,0], M[1,0], M[0,0] | |
| fmul st(2), st(0) //V[0], V[0]*M[2,0], V[0]*M[1,0], M[0,0] | |
| fmulp st(3), st(0) //V[0]*M[2,0], V[0]*M[1,0], V[0]*M[0,0] | |
| fld dword [edx + 4] | |
| fld dword [edx + 20] | |
| fld dword [edx + 36] | |
| fld dword [eax + 4] | |
| fmul st(1), st(0) //+v[1]*M[x,1] | |
| fmul st(2), st(0) | |
| fmulp st(3), st(0) //V[1]*M[2,1], V[1]*M[1,1], V[1]*M[0,1], V[0]*M[2,0], V[0]*M[1,0], V[0]*M[0,0] | |
| faddp st(3), st(0) //v0*m20+v1*m21 | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| fld dword [edx + 8] | |
| fld dword [edx + 24] | |
| fld dword [edx + 40] | |
| fld dword [eax + 8] | |
| fmul st(1), st(0) //+v[2]*M[x,2] | |
| fmul st(2), st(0) | |
| fmulp st(3), st(0) //v0*m20+v1*m21+v2*m22 | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| faddp st(3), st(0) | |
| fstp dword [eax + 8] //v0*m20+v1*m21+v2*m22 | |
| fstp dword [eax + 4] //v0*m10+v1*m11+v2*m12 | |
| fstp dword [eax] //v0*m00+v1*m01+v2*m02 | |
| end; | |
| function AddSVectors(const V1, V2: TSVec): TSVec; overload; | |
| asm | |
| fld dword [eax] | |
| fadd dword [edx] | |
| fstp dword [ecx] | |
| fld dword [eax + 4] | |
| fadd dword [edx + 4] | |
| fstp dword [ecx + 4] | |
| fld dword [eax + 8] | |
| fadd dword [edx + 8] | |
| fstp dword [ecx + 8] | |
| xor eax, eax | |
| mov [ecx + 12], eax | |
| end; | |
| procedure AddSVectors(V1: TPSVec; const V2: TSVec); overload; | |
| asm | |
| fld dword [eax] | |
| fadd dword [edx] | |
| fstp dword [eax] | |
| fld dword [eax + 4] | |
| fadd dword [edx + 4] | |
| fstp dword [eax + 4] | |
| fld dword [eax + 8] | |
| fadd dword [edx + 8] | |
| fstp dword [eax + 8] | |
| end; | |
| function MakeSVecMultiplierFromDynFogCol(sv: TSVec): TSVec; //not used | |
| asm | |
| cmp SupportSSE2, 0 | |
| jz @@1 | |
| @@1: | |
| fld dword [eax] | |
| fld dword [eax + 4] | |
| fld dword [eax + 8] | |
| fld s1d255 | |
| fmul st(3), st(0) | |
| fmul st(2), st(0) | |
| fmulp | |
| fld1 | |
| fsubr st(3), st(0) | |
| fsubr st(2), st(0) | |
| fsubr st(1), st(0) | |
| fxch st(3) //vs0,vs2,vs1,1 | |
| ftst | |
| fnstsw ax | |
| shr ah, 1 | |
| jnc @up1 | |
| fstp st(0) | |
| fldz | |
| @up1: | |
| fcom st(3) | |
| fnstsw ax | |
| shr ah, 1 | |
| jc @skip1 | |
| fstp st(0) | |
| fld1 | |
| @skip1: | |
| fstp dword [edx] | |
| ftst | |
| fnstsw ax | |
| shr ah, 1 | |
| jnc @up2 | |
| fstp st(0) | |
| fldz | |
| @up2: | |
| fcom st(2) | |
| fnstsw ax | |
| shr ah, 1 | |
| jc @skip2 | |
| fstp st(0) | |
| fld1 | |
| @skip2: | |
| fstp dword [edx + 8] | |
| ftst | |
| fnstsw ax | |
| shr ah, 1 | |
| jnc @up3 | |
| fstp st(0) | |
| fldz | |
| @up3: | |
| fcom st(1) | |
| fnstsw ax | |
| shr ah, 1 | |
| jc @skip3 | |
| fstp st(0) | |
| fld1 | |
| @skip3: | |
| fstp dword [edx + 4] | |
| fstp st(0) | |
| xor eax, eax | |
| mov dword [edx + 12], eax | |
| end; | |
| procedure AddSVecWeightS(V1, V2: TPSVec; const W: Single); overload; | |
| asm | |
| cmp SupportSSE, 0 | |
| jz @@1 | |
| movss xmm2, [ebp + 8] | |
| movups xmm0, [edx] | |
| shufps xmm2, xmm2, 0 | |
| movups xmm1, [eax] | |
| mulps xmm0, xmm2 | |
| addps xmm0, xmm1 | |
| movups [eax], xmm0 | |
| pop ebp | |
| ret 4 | |
| @@1: | |
| fld dword [edx] | |
| fld dword [edx + 4] | |
| fld dword [edx + 8] | |
| fld dword [ebp + 8] | |
| fmul st(3), st | |
| fmul st(2), st | |
| fmulp | |
| fadd dword [eax + 8] | |
| fstp dword [eax + 8] | |
| fadd dword [eax + 4] | |
| fstp dword [eax + 4] | |
| fadd dword [eax] | |
| fstp dword [eax] | |
| end; //ret 4 | |
| procedure AddSVecWeightS(var V1: TSVec; const V2: TSVec; const W: Single); overload; | |
| asm | |
| cmp SupportSSE, 0 | |
| jz @@1 | |
| movss xmm2, [ebp + 8] | |
| movups xmm0, [edx] | |
| shufps xmm2, xmm2, 0 | |
| movups xmm1, [eax] | |
| mulps xmm0, xmm2 | |
| addps xmm0, xmm1 | |
| movups [eax], xmm0 | |
| pop ebp | |
| ret 4 | |
| @@1: | |
| fld dword [edx] | |
| fld dword [edx + 4] | |
| fld dword [edx + 8] | |
| fld dword [ebp + 8] | |
| fmul st(3), st | |
| fmul st(2), st | |
| fmulp | |
| fadd dword [eax + 8] | |
| fstp dword [eax + 8] | |
| fadd dword [eax + 4] | |
| fstp dword [eax + 4] | |
| fadd dword [eax] | |
| fstp dword [eax] | |
| end; | |
| function DotOfSVectors(const V1, V2: TSVec): Single; | |
| asm | |
| fld dword [eax] | |
| fmul dword [edx] | |
| fld dword [eax + 4] | |
| fmul dword [edx + 4] | |
| faddp | |
| fld dword [eax + 8] | |
| fmul dword [edx + 8] | |
| faddp | |
| end; | |
| function SubtractVectors2s(const V1, V2: TVec3D): TSVec; | |
| asm | |
| fld qword [eax] | |
| fsub qword [edx] | |
| fstp dword [ecx] | |
| fld qword [eax + 8] | |
| fsub qword [edx + 8] | |
| fstp dword [ecx + 4] | |
| fld qword [eax + 16] | |
| fsub qword [edx + 16] | |
| fstp dword [ecx + 8] | |
| xor eax, eax | |
| mov [ecx + 12], eax | |
| end; | |
| function SubtractVectors(const V1, V2: TVec3D): TVec3D; overload; | |
| asm | |
| fld qword [eax] | |
| fsub qword [edx] | |
| fstp qword [ecx] | |
| fld qword [eax + 8] | |
| fsub qword [edx + 8] | |
| fstp qword [ecx + 8] | |
| fld qword [eax + 16] | |
| fsub qword [edx + 16] | |
| fstp qword [ecx + 16] | |
| end; | |
| // eax edx ecx | |
| function SubtractVectors(V1: TPVec3D; const V2: TVec3D): TVec3D; overload; | |
| asm | |
| fld qword [eax] | |
| fsub qword [edx] | |
| fstp qword [ecx] | |
| fld qword [eax + 8] | |
| fsub qword [edx + 8] | |
| fstp qword [ecx + 8] | |
| fld qword [eax + 16] | |
| fsub qword [edx + 16] | |
| fstp qword [ecx + 16] | |
| end; | |
| function SubtractVectors(const V1: TVec3D; V2: TPVec3D): TVec3D; overload; | |
| asm | |
| fld qword [eax] | |
| fsub qword [edx] | |
| fstp qword [ecx] | |
| fld qword [eax + 8] | |
| fsub qword [edx + 8] | |
| fstp qword [ecx + 8] | |
| fld qword [eax + 16] | |
| fsub qword [edx + 16] | |
| fstp qword [ecx + 16] | |
| end; | |
| function SubtractSVectors(V1: TPSVec; const V2: TSVec): TSVec; | |
| asm | |
| fld dword [eax] | |
| fsub dword [edx] | |
| fstp dword [ecx] | |
| fld dword [eax + 4] | |
| fsub dword [edx + 4] | |
| fstp dword [ecx + 4] | |
| fld dword [eax + 8] | |
| fsub dword [edx + 8] | |
| fstp dword [ecx + 8] | |
| xor eax, eax | |
| mov [ecx + 12], eax | |
| end; | |
| function AddSVecS(const V1: TSVec; const s: Single): TSVec; | |
| asm | |
| fld dword [eax] | |
| fld dword [eax + 4] | |
| fld dword [eax + 8] | |
| fld dword [esp + 8] | |
| fadd st(3), st | |
| fadd st(2), st | |
| faddp | |
| fstp dword [edx + 8] | |
| fstp dword [edx + 4] | |
| fstp dword [edx] | |
| xor eax, eax | |
| mov [edx + 12], eax | |
| end; | |
| procedure ScaleSVectorV(V1: TPSVec; const s: Single); | |
| asm | |
| fld dword [eax] | |
| fld dword [eax + 4] | |
| fld dword [eax + 8] | |
| fld dword [esp + 8] | |
| fmul st(3), st | |
| fmul st(2), st | |
| fmulp | |
| fstp dword [eax + 8] | |
| fstp dword [eax + 4] | |
| fstp dword [eax] | |
| end; | |
| function MultiplySVectors(const V1, V2: TSVec): TSVec; | |
| asm | |
| fld dword [eax + 8] | |
| fld dword [eax + 4] | |
| fld dword [eax] | |
| fmul dword [edx] | |
| fstp dword [ecx] | |
| fmul dword [edx + 4] | |
| fstp dword [ecx + 4] | |
| fmul dword [edx + 8] | |
| fstp dword [ecx + 8] | |
| xor eax, eax | |
| mov [ecx + 12], eax | |
| end; | |
| procedure MultiplySVectorsV(V1, V2: TPSVec); overload; | |
| asm | |
| fld dword [eax + 8] | |
| fld dword [eax + 4] | |
| fld dword [eax] | |
| fmul dword [edx] | |
| fstp dword [eax] | |
| fmul dword [edx + 4] | |
| fstp dword [eax + 4] | |
| fmul dword [edx + 8] | |
| fstp dword [eax + 8] | |
| end; | |
| procedure MultiplySVectorsV(V1: TPSVec; const V2: TSVec); overload; | |
| asm | |
| fld dword [eax + 8] | |
| fld dword [eax + 4] | |
| fld dword [eax] | |
| fmul dword [edx] | |
| fstp dword [eax] | |
| fmul dword [edx + 4] | |
| fstp dword [eax + 4] | |
| fmul dword [edx + 8] | |
| fstp dword [eax + 8] | |
| end; | |
| function ScaleSVector(const V1: TSVec; const s: Single): TSVec; | |
| asm | |
| fld dword [eax] | |
| fld dword [eax + 4] | |
| fld dword [eax + 8] | |
| fld dword [esp + 8] | |
| fmul st(3), st | |
| fmul st(2), st | |
| fmulp | |
| fstp dword [edx + 8] | |
| fstp dword [edx + 4] | |
| fstp dword [edx] | |
| end; | |
| function ScaleSVector4(const V1: TSVec; const s: Single): TSVec; | |
| asm | |
| cmp SupportSSE, 0 | |
| jz @1 | |
| movss xmm1, [esp + 8] | |
| movups xmm0, [eax] | |
| shufps xmm1, xmm1, 0 | |
| mulps xmm0, xmm1 | |
| movups [eax], xmm0 | |
| ret 4 | |
| @1: fld dword [eax] | |
| fld dword [eax + 4] | |
| fld dword [eax + 8] | |
| fld dword [eax + 12] | |
| fld dword [esp + 8] | |
| fmul st(4), st | |
| fmul st(3), st | |
| fmul st(2), st | |
| fmulp | |
| fstp dword [edx + 12] | |
| fstp dword [edx + 8] | |
| fstp dword [edx + 4] | |
| fstp dword [edx] | |
| end; | |
| function ScaleSVectorD(V1: TPSVec; const d: Double): TSVec; | |
| asm | |
| fld dword [eax] | |
| fld dword [eax + 4] | |
| fld dword [eax + 8] | |
| fld qword [esp + 8] | |
| fmul st(3), st | |
| fmul st(2), st | |
| fmulp | |
| fstp dword [edx + 8] | |
| fstp dword [edx + 4] | |
| fstp dword [edx] | |
| xor eax, eax | |
| mov [edx + 12], eax | |
| end; | |
| procedure BuildViewVectorDFOV(var xa, ya: Double; v: TPVec3D); | |
| asm // -sinY, sinX, cosX*cosY ...pano: sinX*cosY, sinY, -cosX*cosY | |
| fld qword [eax] | |
| fsincos //cosX,sinX | |
| fld qword [edx] | |
| fsincos //cosY,sinY,cosX,sinX | |
| fmulp st(2), st(0) //sinY,cosX*cosY,sinX | |
| fchs | |
| fld st(0) //normalize | |
| fmul st(0), st(1) | |
| fld st(2) | |
| fmul st(0), st(3) | |
| faddp | |
| fld st(3) | |
| fmul st(0), st(4) | |
| faddp | |
| fsqrt | |
| fld1 | |
| fdivrp | |
| fmul st(3), st(0) | |
| fmul st(2), st(0) | |
| fmulp | |
| fstp qword [ecx] //cosX*cosY,sinX | |
| fstp qword [ecx + 16] //sinX | |
| fstp qword [ecx + 8] | |
| end; | |
| procedure BuildViewVectorDSphereFOV(var xa, ya: Double; v: TPVec3D); | |
| asm //x<->y | |
| fld qword [edx] | |
| fsincos //cosY,sinY | |
| fld qword [eax] | |
| fsincos //cosX,sinX,cosY,sinY | |
| fmul st(2), st(0) //cosX,sinX,cosX*cosY,sinY // pano: sinX*cosY, sinY, cosX*cosY | |
| fmulp st(3), st(0) //sinX,cosX*cosY,sinY*cosX | |
| fstp qword [ecx + 8] //cosX*cosY,sinX*cosY | |
| fstp qword [ecx + 16] | |
| fchs | |
| fstp qword [ecx] | |
| end; | |
| procedure BuildViewVectorSphereFOV(var xa, ya: Double; v: TPSVec); | |
| asm | |
| fld qword [edx] | |
| fsincos //cosX,sinX X<->Y | |
| fld qword [eax] | |
| fsincos //cosY,sinY,cosX,sinX | |
| fmul st(2), st(0) //cosY,sinY,cosX*cosY,sinX // pano: sinX*cosY, sinY, cosX*cosY | |
| fmulp st(3), st(0) //sinY,cosX*cosY,sinX*cosY | |
| fstp dword [ecx + 4] //cosX*cosY,sinX*cosY | |
| fstp dword [ecx + 8] | |
| fchs | |
| fstp dword [ecx] | |
| fldz | |
| fstp dword [ecx + 12] | |
| end; | |
| procedure BuildViewVectorFOV(var xa, ya: Double; v: TPSVec); | |
| asm // -sinY, sinX, cosX*cosY | |
| fld qword [eax] | |
| fsincos //cosX,sinX | |
| fld qword [edx] | |
| fsincos //cosY,sinY,cosX,sinX | |
| fmulp st(2), st(0) //sinY,cosX*cosY,sinX | |
| fchs //x,z,y | |
| fld st(0) //normalize | |
| fmul st(0), st(1) | |
| fld st(2) | |
| fmul st(0), st(3) | |
| faddp | |
| fld st(3) | |
| fmul st(0), st(4) | |
| faddp | |
| fsqrt | |
| fld1 | |
| fdivrp | |
| fmul st(1), st(0) | |
| fmul st(2), st(0) | |
| fmulp st(3), st(0) | |
| fstp dword [ecx] //cosX*cosY,sinX | |
| fstp dword [ecx + 8] //sinX | |
| fstp dword [ecx + 4] | |
| fldz | |
| fstp dword [ecx + 12] | |
| end; | |
| procedure SVectorChangeSign(V1: TPSVec); | |
| asm | |
| mov edx, $80000000 | |
| xor [eax], edx | |
| xor [eax + 4], edx | |
| xor [eax + 8], edx | |
| end; | |
| procedure mAddVecWeight(V1, V2: TPVec3D; const W: Double); | |
| asm | |
| cmp SupportSSE2, 0 | |
| jz @@1 | |
| movlpd xmm1, [ebp + 8] | |
| movupd xmm2, [edx] | |
| unpcklpd xmm1, xmm1 | |
| movupd xmm0, [eax] | |
| mulpd xmm2, xmm1 | |
| mulsd xmm1, [edx + 16] | |
| addpd xmm0, xmm2 | |
| addsd xmm1, [eax + 16] | |
| movupd [eax], xmm0 | |
| movsd [eax + 16], xmm1 | |
| pop ebp | |
| ret 8 | |
| @@1: | |
| fld qword [edx] | |
| fld qword [edx + 8] | |
| fld qword [edx + 16] | |
| fld qword [ebp + 8] | |
| fmul st(3), st | |
| fmul st(2), st | |
| fmulp | |
| fadd qword [eax + 16] | |
| fstp qword [eax + 16] | |
| fadd qword [eax + 8] | |
| fstp qword [eax + 8] | |
| fadd qword [eax] | |
| fstp qword [eax] | |
| end; | |
| procedure mCopyAddVecWeight(V1, V2, V3: TPVec3D; const W: Double); | |
| asm //dest,src,add weight | |
| cmp SupportSSE2, 0 | |
| jz @@1 | |
| movlpd xmm1, [ebp + 8] | |
| movupd xmm2, [ecx] | |
| unpcklpd xmm1, xmm1 | |
| movupd xmm0, [edx] | |
| mulpd xmm2, xmm1 | |
| mulsd xmm1, [ecx + 16] | |
| addpd xmm0, xmm2 | |
| addsd xmm1, [edx + 16] | |
| movupd [eax], xmm0 | |
| movsd [eax + 16], xmm1 | |
| pop ebp | |
| ret 8 | |
| @@1: | |
| fld qword [ecx] | |
| fld qword [ecx + 8] | |
| fld qword [ecx + 16] | |
| fld qword [ebp + 8] | |
| fmul st(3), st(0) | |
| fmul st(2), st(0) | |
| fmulp | |
| fadd qword [edx + 16] | |
| fstp qword [eax + 16] | |
| fadd qword [edx + 8] | |
| fstp qword [eax + 8] | |
| fadd qword [edx] | |
| fstp qword [eax] | |
| end; | |
| procedure mCopyVec(Vd, Vs: TPVec3D); | |
| asm | |
| fld qword [edx + 16] | |
| fld qword [edx + 8] | |
| fld qword [edx] | |
| fstp qword [eax] | |
| fstp qword [eax + 8] | |
| fstp qword [eax + 16] | |
| end; | |
| procedure CopyVecSSE2(V1, V2: TPVec3D); //not used | |
| asm | |
| movupd xmm0, [edx] | |
| movlpd xmm1, [edx + 16] | |
| movupd [eax], xmm0 | |
| movlpd [eax + 16], xmm1 | |
| end; | |
| procedure CopyVec4SSE2(V1, V2: TPVec4D); | |
| asm | |
| movupd xmm0, [edx] | |
| movupd xmm1, [edx + 16] | |
| movupd [eax], xmm0 | |
| movupd [eax + 16], xmm1 | |
| end; | |
| procedure AddSubVecWeightSSE2(V1, V2, V3: TPVec3D; const W: Double); | |
| asm | |
| movlpd xmm7, [ebp + 8] | |
| movhpd xmm7, [ebp + 8] | |
| movupd xmm2, [ecx] | |
| movupd xmm0, [edx] | |
| movlpd xmm1, [edx + 16] | |
| subpd xmm0, xmm2 | |
| subsd xmm1, [ecx + 16] | |
| movupd xmm4, [eax] | |
| mulpd xmm0, xmm7 | |
| mulsd xmm1, xmm7 | |
| addpd xmm0, xmm4 | |
| addsd xmm1, [eax + 16] | |
| movupd [eax], xmm0 | |
| movlpd [eax + 16], xmm1 | |
| end; | |
| function MaxCS(s1, s2: Single): Single; | |
| asm | |
| fld dword [ebp + 8] | |
| fcomp dword [ebp + 12] | |
| fnstsw ax | |
| shr ah, 1 | |
| jc @S2isSmallerThanS1 | |
| fld dword [ebp + 8] | |
| jmp @end | |
| @S2isSmallerThanS1: | |
| fld dword [ebp + 12] | |
| @end: | |
| end; | |
| function Max0S(s: Single): Single; | |
| asm | |
| fld dword [ebp + 8] | |
| ftst | |
| fnstsw ax | |
| shr ah, 1 | |
| jnc @@1 | |
| fstp st | |
| fldz | |
| @@1: | |
| end; | |
| function MinCS(const s1, s2: Single): Single; | |
| asm | |
| fld dword [ebp + 8] | |
| fcomp dword [ebp + 12] | |
| fnstsw ax | |
| shr ah, 1 | |
| jc @S2isSmallerThanS1 | |
| fld dword [ebp + 12] | |
| jmp @end | |
| @S2isSmallerThanS1: | |
| fld dword [ebp + 8] | |
| @end: | |
| end; | |
| procedure MinMaxSvar(const smin, smax: Single; var s: Single); | |
| asm | |
| cmp SupportSSE, 0 | |
| jz @@1 | |
| movss xmm0, [eax] | |
| maxss xmm0, [ebp + 12] | |
| minss xmm0, [ebp + 8] | |
| movss [eax], xmm0 | |
| pop ebp | |
| ret 8 | |
| @@1: | |
| mov edx, eax | |
| fld dword [eax] | |
| fcom dword [ebp + 12] | |
| fnstsw ax | |
| shr ah, 1 | |
| jc @SminIsSmallerThanS | |
| fcom dword [ebp + 8] | |
| fnstsw ax | |
| shr ah, 1 | |
| jc @end | |
| fstp st(0) | |
| fld dword [ebp + 8] | |
| jmp @end | |
| @SminIsSmallerThanS: | |
| fstp st(0) | |
| fld dword [ebp + 12] | |
| @end: | |
| fstp dword [edx] | |
| end; | |
| function MinMaxCS(const smin, s, smax: Single): Single; | |
| asm | |
| fld dword [ebp + 12] | |
| fcom dword [ebp + 16] | |
| fnstsw ax | |
| shr ah, 1 | |
| jc @SminIsSmallerThanS | |
| fcom dword [ebp + 8] | |
| fnstsw ax | |
| shr ah, 1 | |
| jc @end | |
| fstp st(0) | |
| fld dword [ebp + 8] | |
| jmp @end | |
| @SminIsSmallerThanS: | |
| fstp st(0) | |
| fld dword [ebp + 16] | |
| @end: | |
| end; | |
| function Min0MaxCS(const s, smax: Single): Single; | |
| asm | |
| fld dword [ebp + 12] | |
| ftst | |
| fnstsw ax | |
| shr ah, 1 | |
| jc @SminIsSmallerThanS | |
| fcom dword [ebp + 8] | |
| fnstsw ax | |
| shr ah, 1 | |
| jc @end | |
| fstp st(0) | |
| fld dword [ebp + 8] | |
| jmp @end | |
| @SminIsSmallerThanS: | |
| fstp st(0) | |
| fldz | |
| @end: | |
| end; | |
| procedure MaxCDvar(var ds, ddest: Double); | |
| asm | |
| fld qword [eax] | |
| fcom qword [edx] | |
| fnstsw ax | |
| shr ah, 1 | |
| jc @@1 | |
| fstp qword [edx] | |
| ret | |
| @@1: | |
| fstp st | |
| end; | |
| procedure Clamp1Svar(var s: Single); | |
| asm | |
| fld1 | |
| mov edx, eax | |
| fcom dword [eax] | |
| fnstsw ax | |
| shr ah, 1 | |
| jnc @@1 | |
| fstp dword [edx] | |
| ret | |
| @@1: | |
| fstp st | |
| end; | |
| function Min0MaxCD(const d, dmax: Double): Double; | |
| asm | |
| fld qword [ebp + 16] | |
| ftst | |
| fnstsw ax | |
| shr ah, 1 | |
| jc @@1 | |
| fcom qword [ebp + 8] | |
| fnstsw ax | |
| shr ah, 1 | |
| jc @end | |
| fstp st(0) | |
| fld qword [ebp + 8] | |
| jmp @end | |
| @@1: | |
| fstp st(0) | |
| fldz | |
| @end: | |
| end; | |
| function MinCD(const s1, s2: Double): Double; | |
| asm | |
| fld qword [ebp + 8] | |
| fcomp qword [ebp + 16] | |
| fnstsw ax | |
| shr ah, 1 | |
| jc @S2isSmallerThanS1 | |
| fld qword [ebp + 16] | |
| jmp @end | |
| @S2isSmallerThanS1: | |
| fld qword [ebp + 8] | |
| @end: | |
| end; | |
| function MaxCD(const s1, s2: Double): Double; | |
| asm | |
| fld qword [ebp + 8] | |
| fcomp qword [ebp + 16] | |
| fnstsw ax | |
| shr ah, 1 | |
| jc @S2isSmallerThanS1 | |
| fld qword [ebp + 8] | |
| jmp @end | |
| @S2isSmallerThanS1: | |
| fld qword [ebp + 16] | |
| @end: | |
| end; | |
| function MaxAbsCD(const s1, s2: Double): Double; | |
| asm | |
| fld qword [ebp + 16] | |
| fabs | |
| fld qword [ebp + 8] | |
| fabs | |
| fcompp | |
| fnstsw ax | |
| shr ah, 1 | |
| jc @S2isSmallerThanS1 | |
| fld qword [ebp + 8] | |
| jmp @end | |
| @S2isSmallerThanS1: | |
| fld qword [ebp + 16] | |
| @end: | |
| end; | |
| function MinAbsCD(const s1, s2: Double): Double; | |
| asm | |
| fld qword [ebp + 16] | |
| fabs | |
| fld qword [ebp + 8] | |
| fabs | |
| fcompp | |
| fnstsw ax | |
| shr ah, 1 | |
| jc @S2isSmallerThanS1 | |
| fld qword [ebp + 16] | |
| jmp @end | |
| @S2isSmallerThanS1: | |
| fld qword [ebp + 8] | |
| @end: | |
| end; | |
| procedure SinCosD(const a: Double; var Sin, Cos: Double); | |
| asm | |
| fld a | |
| fsincos | |
| fstp qword ptr [edx] // Cos | |
| fstp qword ptr [eax] // Sin | |
| end; | |
| procedure SinCosS(const a: Double; var Sin, Cos: Single); | |
| asm | |
| fld a | |
| fsincos | |
| fstp dword ptr [edx] // Cos | |
| fstp dword ptr [eax] // Sin | |
| end; | |
| function FracSingle(const s: Single): Single; | |
| asm | |
| fld s //ebp+8 | |
| fld st(0) | |
| sub esp, 4 | |
| fnstcw [esp].word // save | |
| fnstcw [esp + 2].word // scratch | |
| or [esp + 2].word, $0F00 // trunc toward zero, full precision | |
| fldcw [esp + 2].word | |
| frndint | |
| fldcw [esp].word | |
| add esp, 4 | |
| fsubp | |
| end; | |
| function MonitorComponent(Component: TComponent): Boolean; | |
| // ... | |
| asm | |
| mov eax,[ebp+4] | |
| mov Addr,eax | |
| end; | |
| // ... | |
| constructor TMonitorObject.Create; | |
| // ... | |
| asm | |
| mov eax,[ebp+4] | |
| mov Addr,eax | |
| end; | |
| // ... | |
| procedure GetMem(var P; Size: Integer); | |
| // ... | |
| asm | |
| mov eax,[ebp+4] | |
| mov Addr,eax | |
| end; | |
| // ... | |
| procedure BuildATlevels(MWidth, MHeight: Integer); | |
| // ... | |
| asm | |
| push eax | |
| push ebx | |
| push ecx | |
| push esi | |
| push edi | |
| mov ebx, iStep2 | |
| mov ecx, MWidth | |
| mov esi, PATL2 | |
| sub ecx, ebx | |
| mov edi, PATL | |
| shr ecx, 2 | |
| sub esi, ebx | |
| mov eax, ecx | |
| sub edi, esi | |
| shl eax, 2 | |
| add x2, eax | |
| @ll: movq mm0, [esi] // calculate 4 words at once | |
| pavgw mm0, [esi + ebx * 2] | |
| pavgw mm0, [esi + ebx] | |
| movq [edi + esi], mm0 | |
| add esi, 8 | |
| dec ecx | |
| jnz @ll | |
| add edi, esi | |
| mov PATL, edi | |
| add esi, ebx | |
| mov PATL2, esi | |
| pop edi | |
| pop esi | |
| pop ecx | |
| pop ebx | |
| pop eax | |
| end; | |
| // ... | |
| asm | |
| push eax | |
| push ebx | |
| push ecx | |
| push edx | |
| push esi | |
| push edi | |
| movq mm1, W4tmp | |
| mov ebx, MWidth2step | |
| mov esi, PATL2 | |
| mov edi, PATL | |
| mov ecx, iStep | |
| mov edx, MWidth | |
| sub esi, ebx | |
| add edx, edx | |
| sub edi, esi | |
| dec ecx | |
| @l1: movq mm0, [esi + ebx * 2] | |
| pavgw mm0, mm1 | |
| pavgw mm0, [esi + ebx] | |
| movq [edi + esi], mm0 | |
| add esi, edx | |
| dec ecx | |
| jns @l1 | |
| mov ecx, MHeight | |
| sub ecx, iStep2 | |
| dec ecx | |
| js @u2 | |
| @l2: movq mm0, [esi] | |
| pavgw mm0, [esi + ebx * 2] | |
| pavgw mm0, [esi + ebx] | |
| movq [edi + esi], mm0 | |
| add esi, edx | |
| dec ecx | |
| jns @l2 | |
| @u2: | |
| movq mm1, W4tmp2 | |
| mov ecx, iStep | |
| dec ecx | |
| @l3: movq mm0, [esi] | |
| pavgw mm0, mm1 | |
| pavgw mm0, [esi + ebx] | |
| movq [edi + esi], mm0 | |
| add esi, edx | |
| dec ecx | |
| jns @l3 | |
| pop edi | |
| pop esi | |
| pop edx | |
| pop ecx | |
| pop ebx | |
| pop eax | |
| end; | |
| // ... | |
| asm | |
| emms | |
| end; | |
| // ... | |
| procedure TAmbShadowCalc.Execute; | |
| // ... | |
| asm // stmxcsr i | |
| stmxcsr x | |
| end; // if i<>$1f80 then i:=0; //=8064 | |
| // ... | |
| function BuildATlevels(PsiLight, MWidth, MHeight: Integer; PATlevel: TPATlevel; var CorrMul: Single; var Zsub: Integer): Integer; | |
| // ... | |
| asm | |
| emms | |
| end; | |
| // ... | |
| asm | |
| push eax | |
| push ebx | |
| push ecx | |
| push esi | |
| push edi | |
| mov ebx, iStep2 | |
| mov ecx, MWidth | |
| mov esi, PATL2 | |
| sub ecx, ebx | |
| mov edi, PATL | |
| shr ecx, 2 | |
| sub esi, ebx | |
| mov eax, ecx | |
| sub edi, esi | |
| shl eax, 2 | |
| add x2, eax | |
| @ll: movq mm0, [esi] // calculate 4 words at once | |
| pavgw mm0, [esi + ebx * 2] | |
| pavgw mm0, [esi + ebx] | |
| movq [edi + esi], mm0 | |
| add esi, 8 | |
| dec ecx | |
| jnz @ll | |
| add edi, esi | |
| mov PATL, edi | |
| add esi, ebx | |
| mov PATL2, esi | |
| pop edi | |
| pop esi | |
| pop ecx | |
| pop ebx | |
| pop eax | |
| end; | |
| // ... | |
| asm | |
| push eax | |
| push ebx | |
| push ecx | |
| push edx | |
| push esi | |
| push edi | |
| movq mm1, W4tmp | |
| mov ebx, MWidth2step | |
| mov esi, PATL2 | |
| mov edi, PATL | |
| mov ecx, iStep | |
| mov edx, MWidth | |
| sub esi, ebx | |
| add edx, edx | |
| sub edi, esi | |
| dec ecx | |
| @l1: movq mm0, [esi + ebx * 2] | |
| pavgw mm0, mm1 | |
| pavgw mm0, [esi + ebx] | |
| movq [edi + esi], mm0 | |
| add esi, edx | |
| dec ecx | |
| jns @l1 | |
| mov ecx, MHeight | |
| sub ecx, iStep2 | |
| dec ecx | |
| js @u2 | |
| @l2: movq mm0, [esi] | |
| pavgw mm0, [esi + ebx * 2] | |
| pavgw mm0, [esi + ebx] | |
| movq [edi + esi], mm0 | |
| add esi, edx | |
| dec ecx | |
| jns @l2 | |
| @u2: | |
| movq mm1, W4tmp2 | |
| mov ecx, iStep | |
| dec ecx | |
| @l3: movq mm0, [esi] | |
| pavgw mm0, mm1 | |
| pavgw mm0, [esi + ebx] | |
| movq [edi + esi], mm0 | |
| add esi, edx | |
| dec ecx | |
| jns @l3 | |
| pop edi | |
| pop esi | |
| pop edx | |
| pop ecx | |
| pop ebx | |
| pop eax | |
| end; | |
| // ... | |
| asm | |
| emms | |
| end; | |
| // ... | |
| procedure TAmbShadowCalc.Execute; | |
| // ... | |
| asm | |
| stmxcsr x //set roundingmode sse | |
| end; // if i<>$1f80 then i:=0; //=8064 } | |
| // ... | |
| asm | |
| push eax | |
| push ebx | |
| push ecx | |
| push esi | |
| push edi | |
| movss xmm7, RM | |
| mov esi, PATL | |
| lea edi, zp4 | |
| cvtsi2ss xmm4, RadS | |
| movss xmm5, sZRTLev //sZRT | |
| rsqrtss xmm4, xmm4 | |
| movzx eax, word [esi] | |
| movzx ebx, word [esi + 2] | |
| mulss xmm7, xmm4 | |
| sub eax, [edi] | |
| sub ebx, [edi + 4] | |
| cvtss2si ecx, xmm7 //iC | |
| shufps xmm4, xmm4, 0 //R1d | |
| cvtsi2ss xmm0, eax | |
| cvtsi2ss xmm1, ebx | |
| movzx eax, word [esi + 4] | |
| movzx ebx, word [esi + 6] | |
| sub eax, [edi + 8] | |
| sub ebx, [edi + 12] | |
| cvtsi2ss xmm2, eax | |
| cvtsi2ss xmm3, ebx | |
| shufps xmm0, xmm1, 0 | |
| shufps xmm2, xmm3, 0 | |
| shufps xmm5, xmm5, 0 | |
| shufps xmm0, xmm2, $88 | |
| mov eax, iAngC | |
| mov ebx, ecx | |
| shr ebx, 1 | |
| sub eax, ebx | |
| and eax, 31 | |
| add eax, eax | |
| mulps xmm0, xmm4 | |
| lea esi, [AngMaxArr4 + eax * 8] | |
| minps xmm0, xmm5 | |
| @ll: movups xmm1, [esi] | |
| maxps xmm1, xmm0 | |
| movups [esi], xmm1 | |
| add esi, 16 | |
| dec ecx | |
| jns @ll | |
| pop edi | |
| pop esi | |
| pop ecx | |
| pop ebx | |
| pop eax | |
| end | |
| // ... | |
| asm | |
| push eax | |
| push ebx | |
| push esi | |
| push edi | |
| mov esi, PATL | |
| lea edi, zp4 | |
| cvtsi2ss xmm4, RadS | |
| movzx eax, word [esi] | |
| movzx ebx, word [esi + 2] | |
| sub eax, [edi] | |
| sub ebx, [edi + 4] | |
| shufps xmm4, xmm4, 0 | |
| movss xmm5, sZRTLev | |
| cvtsi2ss xmm0, eax | |
| cvtsi2ss xmm1, ebx | |
| rsqrtps xmm4, xmm4 //only 4..6 clocks, not slower than scalar | |
| movzx eax, word [esi + 4] | |
| movzx ebx, word [esi + 6] | |
| sub eax, [edi + 8] | |
| sub ebx, [edi + 12] | |
| cvtsi2ss xmm2, eax | |
| cvtsi2ss xmm3, ebx | |
| shufps xmm5, xmm5, 0 | |
| shufps xmm0, xmm1, 0 | |
| shufps xmm2, xmm3, 0 | |
| mov eax, iAngC | |
| shufps xmm0, xmm2, $88 | |
| add eax, eax | |
| mulps xmm0, xmm4 | |
| movups xmm1, dqword [AngMaxArr4 + eax * 8] | |
| minps xmm0, xmm5 | |
| maxps xmm1, xmm0 | |
| movups dqword [AngMaxArr4 + eax * 8], xmm1 | |
| pop edi | |
| pop esi | |
| pop ebx | |
| pop eax | |
| end | |
| // ... | |
| function BuildATlevelsT0(PsiLight, MWidth, MHeight: Integer; PATlevel: TPATlevel; sZRT: Single): Integer; | |
| // ... | |
| asm | |
| push eax | |
| push ebx | |
| push ecx | |
| push edx | |
| mov ecx, xa | |
| mov edx, PATL2 | |
| mov eax, iStep2 | |
| add ecx, 4 | |
| mov ebx, PATL | |
| sub edx, eax | |
| @@1: cmp ecx, iwids | |
| jg @@3 | |
| movq mm4, [edx + eax] | |
| movq mm1, [edx] //it1 | |
| paddw mm4, iTh4 //PATL2^ + iTh | |
| psubw mm4, sub32k | |
| movq mm2, [edx + eax * 2] //it2 | |
| psubw mm1, sub32k | |
| psubw mm2, sub32k | |
| pminsw mm1, mm4 //only signed word, therefore first sub, afterwards add | |
| pminsw mm2, mm4 | |
| paddw mm1, sub32k | |
| paddw mm2, sub32k | |
| pavgw mm1, mm2 //Average unsigned words | |
| pavgw mm1, [edx + eax] | |
| movq [ebx], mm1 | |
| add ebx, 8 | |
| add edx, 8 | |
| add ecx, 4 | |
| jmp @@1 | |
| @@3: sub ecx, 4 | |
| add edx, eax | |
| mov xa, ecx | |
| mov PATL, ebx | |
| mov PATL2, edx | |
| pop edx | |
| pop ecx | |
| pop ebx | |
| pop eax | |
| end; // xa | |
| // ... | |
| asm | |
| emms | |
| end; | |
| // ... | |
| procedure TAmbShadowCalcT0.Execute; | |
| // ... | |
| asm | |
| stmxcsr x | |
| end; // if i<>$1f80 then i:=0; //=8064 } | |
| // ... | |
| asm | |
| push eax | |
| push ebx | |
| push ecx | |
| push esi | |
| push edi | |
| movss xmm7, RM | |
| mov esi, PATL | |
| lea edi, zp4 | |
| cvtsi2ss xmm4, RadS | |
| movss xmm5, sZRTLev | |
| rsqrtss xmm4, xmm4 | |
| movzx eax, word [esi] | |
| movzx ebx, word [esi + 2] | |
| mulss xmm7, xmm4 | |
| sub eax, [edi] | |
| sub ebx, [edi + 4] | |
| cvtss2si ecx, xmm7 //iC | |
| shufps xmm4, xmm4, 0 //R1d | |
| cvtsi2ss xmm0, eax | |
| cvtsi2ss xmm1, ebx | |
| movzx eax, word [esi + 4] | |
| movzx ebx, word [esi + 6] | |
| sub eax, [edi + 8] | |
| sub ebx, [edi + 12] | |
| cvtsi2ss xmm2, eax | |
| cvtsi2ss xmm3, ebx | |
| shufps xmm0, xmm1, 0 | |
| shufps xmm2, xmm3, 0 | |
| shufps xmm5, xmm5, 0 | |
| shufps xmm0, xmm2, $88 | |
| mov eax, iAngC | |
| mov ebx, ecx | |
| shr ebx, 1 | |
| sub eax, ebx | |
| and eax, 31 | |
| add eax, eax | |
| mulps xmm0, xmm4 | |
| minps xmm0, xmm5 | |
| rcpps xmm2, xmm5 //approx 1/x | |
| lea esi, [AngMaxArr4 + eax * 8] | |
| mulps xmm2, xmm0 | |
| movaps xmm4, xmm2 | |
| mulps xmm2, xmm2 | |
| mulps xmm2, xmm4 | |
| mulps xmm2, xmm0 | |
| subps xmm0, xmm2 | |
| @ll: movups xmm1, [esi] | |
| maxps xmm1, xmm0 | |
| movups [esi], xmm1 | |
| add esi, 16 | |
| dec ecx | |
| jns @ll | |
| pop edi | |
| pop esi | |
| pop ecx | |
| pop ebx | |
| pop eax | |
| end | |
| // ... | |
| asm | |
| push eax | |
| push ebx | |
| push esi | |
| push edi | |
| mov esi, PATL | |
| lea edi, zp4 | |
| cvtsi2ss xmm4, RadS | |
| movzx eax, word [esi] | |
| movzx ebx, word [esi + 2] | |
| sub eax, [edi] | |
| sub ebx, [edi + 4] | |
| shufps xmm4, xmm4, 0 | |
| movss xmm5, sZRTLev | |
| cvtsi2ss xmm0, eax | |
| cvtsi2ss xmm1, ebx | |
| rsqrtps xmm4, xmm4 //only 4..6 clocks, not slower than scalar | |
| movzx eax, word [esi + 4] | |
| movzx ebx, word [esi + 6] | |
| sub eax, [edi + 8] | |
| sub ebx, [edi + 12] | |
| cvtsi2ss xmm2, eax | |
| cvtsi2ss xmm3, ebx | |
| shufps xmm5, xmm5, 0 | |
| shufps xmm0, xmm1, 0 | |
| shufps xmm2, xmm3, 0 | |
| mov eax, iAngC | |
| shufps xmm0, xmm2, $88 | |
| add eax, eax | |
| mulps xmm0, xmm4 | |
| minps xmm0, xmm5 | |
| movups xmm1, dqword [AngMaxArr4 + eax * 8] | |
| rcpps xmm2, xmm5 | |
| mulps xmm2, xmm0 | |
| movaps xmm4, xmm2 | |
| mulps xmm2, xmm2 | |
| mulps xmm2, xmm4 | |
| mulps xmm2, xmm0 | |
| subps xmm0, xmm2 | |
| @up: maxps xmm1, xmm0 | |
| movups dqword [AngMaxArr4 + eax * 8], xmm1 | |
| pop edi | |
| pop esi | |
| pop ebx | |
| pop eax | |
| end | |
| // ... | |
| function ColToSVecFlipRBc(c: Cardinal): TSVec; | |
| asm | |
| add esp, -4 | |
| mov ecx, eax | |
| shr ecx, 16 | |
| and ecx, $FF | |
| mov [esp], ecx | |
| fild dword [esp] | |
| fstp dword [edx] | |
| mov ecx, eax | |
| shr ecx, 8 | |
| and ecx, $FF | |
| mov [esp], ecx | |
| fild dword [esp] | |
| fstp dword [edx + 4] | |
| and eax, $FF | |
| mov [esp], eax | |
| fild dword [esp] | |
| fstp dword [edx + 8] | |
| pop edx | |
| end; | |
| function ColAToSVecFlipRBc(c: Cardinal): TSVec; | |
| asm | |
| mov ecx, eax | |
| shr ecx, 24 | |
| push ecx | |
| fild dword [esp] | |
| fstp dword [edx + 12] | |
| mov ecx, eax | |
| shr ecx, 16 | |
| and ecx, $FF | |
| mov [esp], ecx | |
| fild dword [esp] | |
| fstp dword [edx] | |
| mov ecx, eax | |
| shr ecx, 8 | |
| and ecx, $FF | |
| mov [esp], ecx | |
| fild dword [esp] | |
| fstp dword [edx + 4] | |
| and eax, $FF | |
| mov [esp], eax | |
| fild dword [esp] | |
| fstp dword [edx + 8] | |
| pop edx | |
| end; | |
| function SVecToColNoScale(sv: TSVec): Cardinal; | |
| asm | |
| add esp, -16 | |
| push 0 | |
| push $437f0000 | |
| lea edx, [esp + 8] | |
| call [mMinMaxSVec] | |
| fld dword [esp] | |
| fistp word [esp] | |
| fld dword [esp + 4] | |
| fistp word [esp + 1] | |
| fld dword [esp + 8] | |
| fistp word [esp + 2] | |
| mov eax, [esp] | |
| add esp, 16 | |
| end; | |
| function SVecToColNoScaleFlipXZ(var sv: TSVec): Cardinal; | |
| asm | |
| add esp, -16 | |
| push 0 | |
| push $437f0000 | |
| lea edx, [esp + 8] //2x pushed, +8 is esp..esp+16 for svec | |
| call [mMinMaxSVec] //mMinMaxSVec(const smin, smax: Single; const V1: TSVec): TSVec; ret8 | |
| fld dword [esp + 8] // ebp+12, ebp+8, eax edx | |
| fistp word [esp + 8] | |
| fld dword [esp + 4] | |
| fistp word [esp + 9] | |
| fld dword [esp] | |
| fistp word [esp + 10] | |
| mov eax, [esp + 8] | |
| add esp, 16 | |
| end; | |
| procedure MinMaxClip15bit(var s: Single; var w: Word); | |
| const s32767: Single = 32767; | |
| asm | |
| cmp SupportSSE, 0 | |
| jz @@1 | |
| movss xmm0, [eax] | |
| xorps xmm1, xmm1 | |
| minss xmm0, s32767 | |
| maxss xmm0, xmm1 | |
| cvtss2si eax, xmm0 | |
| mov word [edx], ax | |
| ret | |
| @@1: | |
| fld dword [eax] | |
| ftst | |
| fnstsw ax | |
| and ah, 41H | |
| jz @biggerThanZero | |
| fstp st(0) | |
| mov word [edx], 0 | |
| jmp @e | |
| @biggerThanZero: | |
| fcom s32767 | |
| fnstsw ax | |
| shr ah, 1 | |
| jc @SmallerThanS3 | |
| fstp st(0) | |
| mov word [edx], 32767 | |
| jmp @e | |
| @SmallerThanS3: | |
| fistp word [edx] | |
| @e: | |
| end; | |
| function CPUID_Supported: Boolean; | |
| asm | |
| pushfd | |
| pop eax | |
| mov edx, eax | |
| xor eax, $200000 | |
| push eax | |
| popfd | |
| pushfd | |
| pop eax | |
| xor eax, edx | |
| setnz al | |
| end; | |
| function GetCPUID(AInfoRequired: Integer): TRegisters; | |
| asm | |
| push ebx | |
| push esi | |
| mov esi, edx | |
| cpuid | |
| mov TRegisters[esi].RegEAX, eax | |
| mov TRegisters[esi].RegEBX, ebx | |
| mov TRegisters[esi].RegECX, ecx | |
| mov TRegisters[esi].RegEDX, edx | |
| pop esi | |
| pop ebx | |
| end; | |
| procedure FastMove(const Source; var Dest; count: Integer); | |
| asm | |
| cmp eax, edx | |
| je @@Exit | |
| cmp ecx, 32 | |
| ja @@LargeMove //Count > 32 or Count < 0 | |
| sub ecx, 8 | |
| jg @@SmallMove | |
| @@TinyMove: //0..8 Byte Move | |
| jmp dword [@@JumpTable + 32 + ecx * 4] | |
| @@SmallMove: //9..32 Byte Move | |
| fild qword [eax + ecx] | |
| fild qword [eax] | |
| cmp ecx, 8 | |
| jle @@Small16 | |
| fild qword [eax + 8] | |
| cmp ecx, 16 | |
| jle @@Small24 | |
| fild qword [eax + 16] | |
| fistp qword [edx + 16] | |
| @@Small24: | |
| fistp qword [edx + 8] | |
| @@Small16: | |
| fistp qword [edx] | |
| fistp qword [edx + ecx] | |
| @@Exit: | |
| ret | |
| nop //4-Byte Align JumpTable | |
| nop | |
| @@JumpTable: | |
| dd @@Exit, @@M01, @@M02, @@M03, @@M04, @@M05, @@M06, @@M07, @@M08 | |
| @@LargeForwardMove: | |
| push edx | |
| fild qword [eax] | |
| lea eax, [eax + ecx - 8] | |
| lea ecx, [ecx + edx - 8] | |
| fild qword [eax] //fp stack check error | |
| push ecx | |
| neg ecx | |
| and edx, -8 | |
| lea ecx, [ecx + edx + 8] | |
| pop edx | |
| @FwdLoop: | |
| fild qword [eax + ecx] | |
| fistp qword [edx + ecx] | |
| add ecx, 8 | |
| jl @FwdLoop | |
| fistp qword [edx] | |
| pop edx | |
| fistp qword [edx] | |
| ret | |
| @@LargeMove: | |
| jng @@LargeDone // Count < 0 | |
| cmp eax, edx | |
| ja @@LargeForwardMove | |
| sub edx, ecx | |
| cmp eax, edx | |
| lea edx, [edx + ecx] | |
| jna @@LargeForwardMove | |
| sub ecx, 8 | |
| push ecx | |
| fild qword [eax + ecx] | |
| fild qword [eax] | |
| add ecx, edx | |
| and ecx, -8 | |
| sub ecx, edx | |
| @BwdLoop: | |
| fild qword [eax + ecx] | |
| fistp qword [edx + ecx] | |
| sub ecx, 8 | |
| jg @BwdLoop | |
| pop ecx | |
| fistp qword [edx] | |
| fistp qword [edx + ecx] | |
| @@LargeDone: | |
| ret | |
| @@M01: | |
| movzx ecx, [eax] | |
| mov [edx], cl | |
| ret | |
| @@M02: | |
| movzx ecx, word [eax] | |
| mov [edx], cx | |
| ret | |
| @@M03: | |
| mov cx, [eax] | |
| mov al, [eax + 2] | |
| mov [edx], cx | |
| mov [edx + 2], al | |
| ret | |
| @@M04: | |
| mov ecx, [eax] | |
| mov [edx], ecx | |
| ret | |
| @@M05: | |
| mov ecx, [eax] | |
| mov al, [eax + 4] | |
| mov [edx], ecx | |
| mov [edx + 4], al | |
| ret | |
| @@M06: | |
| mov ecx, [eax] | |
| mov ax, [eax + 4] | |
| mov [edx], ecx | |
| mov [edx + 4], ax | |
| ret | |
| @@M07: | |
| mov ecx, [eax] | |
| mov eax, [eax + 3] | |
| mov [edx], ecx | |
| mov [edx + 3], eax | |
| ret | |
| @@M08: | |
| fild qword [eax] | |
| fistp qword [edx] | |
| end; | |
| procedure fill0bytes(const p: Pointer; const anz: Integer; const useSSE: Boolean); | |
| // ... | |
| asm | |
| push eax | |
| push ecx | |
| mov ecx, x4 | |
| mov eax, p1 | |
| xorps xmm0, xmm0 | |
| @loop: movaps [eax], xmm0 | |
| add eax, 16 | |
| sub ecx, 1 | |
| jnz @loop | |
| mov p1, eax | |
| pop ecx | |
| pop eax | |
| end; | |
| // ... | |
| procedure doFFT(const d: Double); | |
| // ... | |
| asm | |
| push edx | |
| push ecx | |
| push ebx | |
| push eax | |
| push esi | |
| push edi | |
| mov ebx, pFFTreal | |
| mov ecx, pFFTimag | |
| movsd xmm7, d | |
| shufpd xmm7, xmm7, 0 | |
| mov eax, l | |
| @loo0: shl eax, 1 // while l<=fl2 | |
| mov edi, eax // war: ischritt, eax | |
| xor eax, eax // eax=m | |
| mov tabnr, eax | |
| @loo1: mov edx, tabnr // for m:=0 to l-1 | |
| mov esi, pFFTcos | |
| movlpd xmm3, [esi + edx * 8] | |
| mov esi, pFFTsin | |
| movlpd xmm4, [esi + edx * 8] | |
| shufpd xmm3, xmm3, 0 // xmm3 = [wichreal, wichreal] | |
| shufpd xmm4, xmm4, 0 // xmm4 = [wichimag, wichimag] | |
| mulpd xmm4, xmm7 // xorpd xmm4, [sign] | |
| mov edx, eax // edx=i=m | |
| @loo2: mov esi, edx | |
| add esi, l // j=i+l | |
| movlpd xmm0, [ebx + esi * 8] // hi lo | |
| movhpd xmm0, [ecx + esi * 8] // xmm0 = [imag, real] | |
| movapd xmm1, xmm0 | |
| shufpd xmm1, xmm1, 1 // xmm1 = [real, imag] (,1=swap) | |
| mulpd xmm0, xmm3 // xmm0 = [imag*wichreal, real*wichreal] | |
| mulpd xmm1, xmm4 // xmm1 = [real*wichimag, imag*wichimag] | |
| movapd xmm2, xmm0 | |
| addpd xmm0, xmm1 // xmm0 = [i*wr+r*wi, r*wr+i*wi] | |
| subpd xmm2, xmm1 // xmm2 = [i*wr-r*wi, r*wr-i*wi] | |
| shufpd xmm2, xmm0, 2 // xmm2 = [i*wr+r*wi, r*wr-i*wi]? | |
| // tmpimag tmpreal | |
| movlpd xmm0, [ebx + edx * 8] | |
| movhpd xmm0, [ecx + edx * 8] // xmm0 = [imag_i, real_i] | |
| movapd xmm1, xmm0 | |
| subpd xmm0, xmm2 | |
| addpd xmm1, xmm2 | |
| movlpd [ebx + esi * 8], xmm0 | |
| movhpd [ecx + esi * 8], xmm0 | |
| movlpd [ebx + edx * 8], xmm1 | |
| movhpd [ecx + edx * 8], xmm1 | |
| add edx, edi | |
| cmp edx, fftlength | |
| jl @loo2 | |
| mov esi, fl3 | |
| add tabnr, esi | |
| add eax, 1 | |
| cmp eax, l // for m:=0 to l-1 | |
| jl @loo1 | |
| shr fl3, 1 | |
| mov eax, edi // ischritt | |
| mov l, eax | |
| cmp eax, fl2 // while l<=fl2 | |
| jle @loo0 | |
| pop edi | |
| pop esi | |
| pop eax | |
| pop ebx | |
| pop ecx | |
| pop edx | |
| end; | |
| // ... | |
| procedure FirstATlevelCAO(PIA: TPCardinalArray; PsiLight: TPsiLight5; Leng: Integer); | |
| asm | |
| push esi | |
| dec ecx | |
| js @@out | |
| inc ecx | |
| add edx, 8 | |
| @@1: | |
| cmp word [edx], $8000 | |
| jnb @@2 | |
| mov esi, [edx-2] | |
| and esi, $ffffff00 | |
| shr esi, 1 | |
| jmp @@3 | |
| @@2: | |
| xor esi, esi | |
| @@3: | |
| mov [eax], esi | |
| add edx, 18 | |
| add eax, 4 | |
| dec ecx | |
| jnz @@1 | |
| @@out: | |
| pop esi | |
| end; | |
| procedure SmoothH(PIA, SA: TPCardinalArray; ya, Step: Integer); | |
| asm | |
| add esp, -12 | |
| push ebx | |
| push esi | |
| push edi | |
| mov [ebp-8], ecx | |
| mov ebx, edx | |
| mov edi, [ebp+8] | |
| mov edx, ecx | |
| test edx, edx | |
| jl @@2 | |
| inc edx | |
| mov [ebp-12], edx | |
| xor esi, esi | |
| @@1: | |
| mov edx, esi | |
| sub edx, edi | |
| test edx, edx | |
| jnl @@3 | |
| xor edx, edx | |
| @@3: | |
| mov ecx, edi | |
| add ecx, esi | |
| cmp ecx, [ebp-8] | |
| jle @@4 | |
| mov ecx, [ebp-8] | |
| @@4: | |
| mov ecx, [ebx+ecx*4] | |
| add ecx, [ebx+edx*4] | |
| shr ecx, 1 | |
| add ecx, [eax] | |
| shr ecx, 1 | |
| mov [eax], ecx | |
| inc esi | |
| add eax, 4 | |
| dec dword [ebp-12] | |
| jnz @@1 | |
| @@2: | |
| pop edi | |
| pop esi | |
| pop ebx | |
| add esp, 12 | |
| end; | |
| procedure SmoothV(PIA, SA: TPCardinalArray; ye, Step, wid: Integer); | |
| asm | |
| add esp, -12 | |
| push ebx | |
| push esi | |
| push edi | |
| mov [ebp-8], ecx | |
| mov ebx, edx | |
| mov edi, [ebp+12] | |
| mov edx, ecx | |
| test edx, edx | |
| jl @@2 | |
| inc edx | |
| mov [ebp-12], edx | |
| xor esi, esi | |
| @@1: | |
| mov edx, esi | |
| sub edx, edi | |
| test edx, edx | |
| jnl @@3 | |
| xor edx, edx | |
| @@3: | |
| mov ecx, edi | |
| add ecx, esi | |
| cmp ecx, [ebp-8] | |
| jle @@4 | |
| mov ecx, [ebp-8] | |
| @@4: | |
| mov ecx, [ebx+ecx*4] | |
| add ecx, [ebx+edx*4] | |
| shr ecx, 1 | |
| add ecx, [eax] | |
| shr ecx, 1 | |
| mov [eax], ecx | |
| inc esi | |
| add eax, dword [ebp+8] | |
| dec dword [ebp-12] | |
| jnz @@1 | |
| @@2: | |
| pop edi | |
| pop esi | |
| pop ebx | |
| add esp, 12 | |
| end; | |
| procedure MinSI(var SI: SmallInt; var i: Integer); | |
| asm | |
| movsx ecx, word [eax] | |
| cmp ecx, [edx] | |
| jnl @@1 | |
| cmp dword [edx], $7FFF | |
| jl @@2 | |
| mov word [eax], $7FFF | |
| ret | |
| @@2: | |
| mov edx, [edx] | |
| mov word [eax], dx | |
| @@1: | |
| end; | |
| function NotOnlyBackGround4(p: Pointer): Integer; | |
| asm | |
| mov edx, [eax] | |
| and edx, [eax + 18] | |
| and edx, [eax + 36] | |
| and edx, [eax + 54] | |
| and edx, $80000000 | |
| mov eax, edx | |
| end; | |
| procedure MakeZP4(p: Pointer; var zp: array of Integer); | |
| asm | |
| mov ecx, [eax] | |
| and ecx, $FFFFFF00 | |
| shr ecx, 1 | |
| mov [edx], ecx | |
| mov ecx, [eax + 18] | |
| and ecx, $FFFFFF00 | |
| shr ecx, 1 | |
| mov [edx + 4], ecx | |
| mov ecx, [eax + 36] | |
| and ecx, $FFFFFF00 | |
| shr ecx, 1 | |
| mov [edx + 8], ecx | |
| mov ecx, [eax + 54] | |
| and ecx, $FFFFFF00 | |
| shr ecx, 1 | |
| mov [edx + 12], ecx | |
| end; | |
| procedure isMemberQuat(PIteration3D: TPIteration3D); | |
| // ... | |
| asm | |
| push esi | |
| push edi | |
| push ecx | |
| mov esi, PIteration3D | |
| xor ecx, ecx | |
| mov edi, [esi + 48] | |
| @u: movupd xmm0, [esi] // C1, C2 = X1, X2 | |
| movsd xmm1, [esi + 16] // C3, 0 = X3, X4 | |
| movapd xmm2, xmm0 | |
| movapd xmm3, xmm1 | |
| mulpd xmm2, xmm0 // X1*X1, X2*X2 | |
| mulpd xmm3, xmm1 // X3*X3, X4*X4 | |
| movapd xmm4, xmm2 | |
| addpd xmm4, xmm3 // X1*X1 + X3*X3, X2*X2 + X4*X4 | |
| pshufd xmm5, xmm4, $4E // X2*X2 + X4*X4, X1*X1 + X3*X3 | |
| addsd xmm4, xmm5 // Rout | |
| @a: addsd xmm3, xmm5 // X3*X3 + X2*X2 + X4*X4 | |
| movlpd Rold, xmm4 | |
| pshufd xmm7, xmm0, $4E // X2, X1 | |
| subsd xmm2, xmm3 // X1*X1 - X2*X2 - X3*X3 - X4*X4 | |
| movapd xmm5, xmm0 // X1, X2 | |
| mulsd xmm7, xmm0 // X2*X1 | |
| pshufd xmm6, xmm1, $4E // X4, X3 | |
| addsd xmm2, [esi] | |
| movapd xmm3, xmm6 // X4, X3 | |
| movapd xmm0, xmm2 // X1 = X1*X1 - X2*X2 - X3*X3 - X4*X4 + C1; | |
| mulpd xmm3, xmm5 // X4*X1, X3*X2 | |
| mulsd xmm6, xmm1 // X4*X3 | |
| mulpd xmm5, xmm1 // X1*X3, X2*X4 | |
| addsd xmm7, xmm6 // X2*X1 + X4*X3 | |
| pshufd xmm1, xmm5, $4E // X2*X4, X1*X3 | |
| addsd xmm7, xmm7 // 2 * (X2*X1 + X4*X3) | |
| addsd xmm7, [esi + 8] // X2 = 2 * (X2*X1 + X3*X4) + C2 | |
| subsd xmm5, xmm1 // X2*X4*O1 + X1*X3 sub | |
| shufpd xmm0, xmm7, 0 // X1, X2 | |
| addsd xmm5, xmm5 // 2 * (X2*X4*O1 + X1*X3) | |
| pshufd xmm6, xmm3, $4E // X3*X2, X4*X1 | |
| addsd xmm5, [esi + 16] // X3 = 2 * (X2*X4*O1 + X1*X3) + C3 | |
| addsd xmm6, xmm3 // X3*X2 + X4*X1 | |
| movsd xmm1, xmm5 | |
| addsd xmm6, xmm6 // X4 = 2 * (X4*X1 + X3*X2) | |
| shufpd xmm1, xmm6, 0 // X3, X4 | |
| movapd xmm2, xmm0 | |
| movapd xmm3, xmm1 | |
| mulpd xmm2, xmm0 // X1*X1, X2*X2 | |
| mulpd xmm3, xmm1 // X3*X3, X4*X4 | |
| movapd xmm4, xmm2 | |
| addpd xmm4, xmm3 // X1*X1 + X3*X3, X2*X2 + X4*X4 | |
| pshufd xmm5, xmm4, $4E // | |
| addsd xmm4, xmm5 // Rout | |
| inc ecx | |
| cmp ecx, [esi + 68] | |
| jge @c | |
| ucomisd xmm4, [edi + 160] //>8? | |
| jb @a | |
| @c: movlpd [esi + 56], xmm4 // Rout = double | |
| mov [esi + 64], ecx // ItResultI | |
| pop ecx | |
| pop edi | |
| pop esi | |
| end | |
| // ... | |
| procedure UpdateScaledImage(StartYh, EndYh: Integer); | |
| // ... | |
| asm | |
| push eax | |
| push ebx | |
| push ecx | |
| push edx | |
| push edi | |
| push esi | |
| mov ecx, wid | |
| mov esi, PB1 | |
| mov edi, PBh | |
| mov ebx, mFSIoffset | |
| @ll: movzx eax, byte ptr [esi] | |
| movzx edx, byte ptr [esi + 4] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + ebx] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + ebx + 4] | |
| lea eax, [eax + edx + 2] | |
| shr eax, 2 | |
| mov [edi], al | |
| movzx eax, byte ptr [esi + 1] | |
| movzx edx, byte ptr [esi + 5] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + ebx + 1] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + ebx + 5] | |
| lea eax, [eax + edx + 2] | |
| shr eax, 2 | |
| mov [edi + 1], al | |
| movzx eax, byte ptr [esi + 2] | |
| movzx edx, byte ptr [esi + 6] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + ebx + 2] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + ebx + 6] | |
| lea eax, [eax + edx + 2] | |
| shr eax, 2 | |
| mov [edi + 2], al | |
| add esi, 8 | |
| add edi, 4 | |
| dec ecx | |
| jnz @ll | |
| pop esi | |
| pop edi | |
| pop edx | |
| pop ecx | |
| pop ebx | |
| pop eax | |
| end | |
| // ... | |
| asm | |
| push eax | |
| push ebx | |
| push ecx | |
| push edx | |
| push edi | |
| push esi | |
| mov ecx, wid | |
| mov esi, PB1 | |
| mov edi, PBh | |
| mov ebx, mFSIoffset | |
| @ll: movzx eax, byte ptr [esi] | |
| movzx edx, byte ptr [esi + 4] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + 8] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + ebx] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + ebx + 4] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + ebx + 8] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + ebx * 2] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + ebx * 2 + 4] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + ebx * 2 + 8] | |
| lea eax, [eax + edx + 4] | |
| div b | |
| mov [edi], al | |
| movzx eax, byte ptr [esi + 1] | |
| movzx edx, byte ptr [esi + 5] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + 9] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + ebx + 1] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + ebx + 5] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + ebx + 9] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + ebx * 2 + 1] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + ebx * 2 + 5] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + ebx * 2 + 9] | |
| lea eax, [eax + edx + 4] | |
| div b | |
| mov [edi + 1], al | |
| movzx eax, byte ptr [esi + 2] | |
| movzx edx, byte ptr [esi + 6] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + 10] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + ebx + 2] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + ebx + 6] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + ebx + 10] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + ebx * 2 + 2] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + ebx * 2 + 6] | |
| add eax, edx | |
| movzx edx, byte ptr [esi + ebx * 2 + 10] | |
| lea eax, [eax + edx + 4] | |
| div b | |
| mov [edi + 2], al | |
| add esi, 12 | |
| add edi, 4 | |
| dec ecx | |
| jnz @ll | |
| pop esi | |
| pop edi | |
| pop edx | |
| pop ecx | |
| pop ebx | |
| pop eax | |
| end | |
| // ... | |
| asm //sum rows to buf | |
| push eax | |
| push ebx | |
| push ecx | |
| push edx | |
| push edi | |
| push esi | |
| mov ebx, ImageScale | |
| dec ebx | |
| mov y2, ebx | |
| mov edi, PC1 | |
| lea eax, ebx * 4 - 1 | |
| mov itmp, eax | |
| @@0: mov ecx, wid | |
| mov w2, ecx | |
| mov esi, PB1 | |
| mov eax, mFSIoffset | |
| mul y2 | |
| add esi, eax | |
| @ll: mov ecx, ebx | |
| movzx eax, byte ptr [esi] | |
| @@1: add esi, 4 | |
| movzx edx, byte ptr [esi] | |
| add eax, edx | |
| dec ecx | |
| jnz @@1 | |
| mov [edi], eax | |
| sub esi, itmp | |
| mov ecx, ebx | |
| movzx eax, byte ptr [esi] | |
| @@2: add esi, 4 | |
| movzx edx, byte ptr [esi] | |
| add eax, edx | |
| dec ecx | |
| jnz @@2 | |
| mov [edi + 4], eax | |
| sub esi, itmp | |
| mov ecx, ebx | |
| movzx eax, byte ptr [esi] | |
| @@3: add esi, 4 | |
| movzx edx, byte ptr [esi] | |
| add eax, edx | |
| dec ecx | |
| jnz @@3 | |
| mov [edi + 8], eax | |
| add edi, 12 | |
| add esi, 2 | |
| dec w2 | |
| jnz @ll | |
| dec y2 | |
| jns @@0 | |
| pop esi | |
| pop edi | |
| pop edx | |
| pop ecx | |
| pop ebx | |
| pop eax | |
| end; | |
| // ... | |
| asm //sum columns | |
| push eax | |
| push ebx | |
| push ecx | |
| push edx | |
| push edi | |
| push esi | |
| mov eax, ImageScale | |
| dec eax | |
| mov y2, eax | |
| mov edx, PC1 | |
| mov ebx, wid | |
| mov w2, ebx | |
| shl ebx, 2 | |
| lea ebx, ebx * 2 + ebx | |
| mov edi, PBh | |
| @ll: mov ecx, y2 | |
| mov esi, edx | |
| mov eax, [esi] | |
| @@1: add esi, ebx | |
| add eax, [esi] | |
| dec ecx | |
| jnz @@1 | |
| add eax, a | |
| div b | |
| mov [edi], al | |
| add edx, 4 | |
| mov ecx, y2 | |
| mov esi, edx | |
| mov eax, [esi] | |
| @@2: add esi, ebx | |
| add eax, [esi] | |
| dec ecx | |
| jnz @@2 | |
| add eax, a | |
| div b | |
| mov [edi + 1], al | |
| add edx, 4 | |
| mov ecx, y2 | |
| mov esi, edx | |
| mov eax, [esi] | |
| @@3: add esi, ebx | |
| add eax, [esi] | |
| dec ecx | |
| jnz @@3 | |
| add eax, a | |
| div b | |
| mov [edi + 2], al | |
| add edx, 4 | |
| add edi, 4 | |
| dec w2 | |
| jnz @ll | |
| pop esi | |
| pop edi | |
| pop edx | |
| pop ecx | |
| pop ebx | |
| pop eax | |
| end; | |
| // ... | |
| function ColToSVecFlipRBc4(c: T4Cardinal): T4SVec; | |
| asm | |
| push ebx | |
| push esi | |
| push edi | |
| add esp, -16 | |
| mov ebx, [eax] | |
| mov ecx, [eax + 4] | |
| mov esi, [eax + 8] | |
| mov edi, [eax + 12] | |
| mov ebx, [ebx + 2] //dereferenz | |
| mov ecx, [ecx + 2] | |
| mov esi, [esi + 2] | |
| mov edi, [edi + 2] | |
| and ebx, $FF | |
| and ecx, $FF | |
| and esi, $FF | |
| and edi, $FF | |
| mov [esp], ebx | |
| mov [esp + 4], ecx | |
| mov [esp + 8], esi | |
| mov [esp + 12], edi | |
| fild dword [esp] | |
| fild dword [esp + 4] | |
| fild dword [esp + 8] | |
| fild dword [esp + 12] | |
| fstp dword [edx + 48] | |
| fstp dword [edx + 32] | |
| fstp dword [edx + 16] | |
| fstp dword [edx] | |
| mov ebx, [eax] | |
| mov ecx, [eax + 4] | |
| mov esi, [eax + 8] | |
| mov edi, [eax + 12] | |
| mov ebx, [ebx + 1] //dereferenz | |
| mov ecx, [ecx + 1] | |
| mov esi, [esi + 1] | |
| mov edi, [edi + 1] | |
| and ebx, $FF | |
| and ecx, $FF | |
| and esi, $FF | |
| and edi, $FF | |
| mov [esp], ebx | |
| mov [esp + 4], ecx | |
| mov [esp + 8], esi | |
| mov [esp + 12], edi | |
| fild dword [esp] | |
| fild dword [esp + 4] | |
| fild dword [esp + 8] | |
| fild dword [esp + 12] | |
| fstp dword [edx + 52] | |
| fstp dword [edx + 36] | |
| fstp dword [edx + 20] | |
| fstp dword [edx + 4] | |
| mov ebx, [eax] | |
| mov ecx, [eax + 4] | |
| mov esi, [eax + 8] | |
| mov edi, [eax + 12] | |
| mov ebx, [ebx] //dereferenz | |
| mov ecx, [ecx] | |
| mov esi, [esi] | |
| mov edi, [edi] | |
| and ebx, $FF | |
| and ecx, $FF | |
| and esi, $FF | |
| and edi, $FF | |
| xor eax, eax | |
| mov [esp], ebx | |
| mov [esp + 4], ecx | |
| mov [esp + 8], esi | |
| mov [esp + 12], edi | |
| fild dword [esp] | |
| fild dword [esp + 4] | |
| fild dword [esp + 8] | |
| fild dword [esp + 12] | |
| mov [edx + 12], eax | |
| mov [edx + 28], eax | |
| mov [edx + 44], eax | |
| mov [edx + 60], eax | |
| fstp dword [edx + 56] | |
| fstp dword [edx + 40] | |
| fstp dword [edx + 24] | |
| fstp dword [edx + 8] | |
| add esp, 16 | |
| pop edi | |
| pop esi | |
| pop ebx | |
| end; | |
| procedure ColToSVecSSE2(c: T4Cardinal; sv, svout: TPSVec); | |
| asm //CVTDQ2PS: sse2 - 4 ints to 4 singles | |
| MOVDQU xmm5, [edx] //PSRLDQ: sse2 - xmm1, imm8 Shift xmm1 right by imm8 while shifting in 0s. | |
| add esp, -16 | |
| mov edx, [eax] | |
| mov edx, [edx] | |
| mov [esp], edx | |
| mov edx, [eax + 4] | |
| mov edx, [edx] | |
| mov [esp + 4], edx | |
| mov edx, [eax + 8] | |
| mov eax, [eax + 12] | |
| mov edx, [edx] | |
| mov eax, [eax] | |
| mov [esp + 8], edx | |
| mov [esp + 12], eax | |
| movss xmm0, s1d255 | |
| MOVDQU xmm1, [esp] //[eax] 4 cardinal colors | |
| MOVDQU xmm4, sva1 | |
| MOVDQA xmm2, xmm1 //todo: use input pointers, load vals before | |
| MOVDQA xmm3, xmm1 | |
| PSRLDQ xmm2, 1 //green | |
| PSRLDQ xmm3, 2 //blue | |
| shufps xmm0, xmm0, 0 | |
| andps xmm1, xmm4 //red or $FF000000FF000000FF000000FF | |
| andps xmm2, xmm4 | |
| andps xmm3, xmm4 | |
| mulps xmm5, xmm0 | |
| CVTDQ2PS xmm1, xmm1 | |
| CVTDQ2PS xmm2, xmm2 | |
| CVTDQ2PS xmm3, xmm3 | |
| mulps xmm1, xmm5 | |
| mulps xmm2, xmm5 | |
| mulps xmm3, xmm5 | |
| MOVLHPS xmm4, xmm1 //HADDD L1,.. (H,L) | |
| movhlps xmm4, xmm3 //L1,H3 | |
| shufps xmm3, xmm1, $E4 //H1,L3 | |
| MOVHLPS xmm0, xmm2 //..,H2 | |
| addps xmm4, xmm3 //11,33 | |
| addps xmm0, xmm2 //..,22 | |
| pshufd xmm5, xmm4, $B1 //can't copy 1 dw to more than 1 dest! | |
| pshufd xmm2, xmm0, $B1 | |
| addps xmm5, xmm4 //3,1 | |
| addss xmm2, xmm0 //.,2 | |
| movups [ecx], xmm5 // r,.,b | |
| movss [ecx + 4], xmm2 // .,g,. | |
| add esp, 16 | |
| end; | |
| procedure ColToSVecSqrSSE2(c: T4Cardinal; sv, svout: TPSVec); //svout := sumof([0..3] cardinal colors * sv[0..3]) | |
| const scmul: Single = 1 / 65025; | |
| asm // eax edx ecx | |
| MOVDQU xmm5, [edx] | |
| add esp, -16 | |
| mov edx, [eax] | |
| mov edx, [edx] | |
| mov [esp], edx | |
| mov edx, [eax + 4] | |
| mov edx, [edx] | |
| mov [esp + 4], edx | |
| mov edx, [eax + 8] | |
| mov eax, [eax + 12] | |
| mov edx, [edx] | |
| mov eax, [eax] | |
| mov [esp + 8], edx | |
| mov [esp + 12], eax | |
| movss xmm0, scmul | |
| MOVDQU xmm1, [esp] | |
| MOVDQU xmm4, sva1 | |
| MOVDQA xmm2, xmm1 | |
| MOVDQA xmm3, xmm1 | |
| PSRLDQ xmm2, 1 | |
| PSRLDQ xmm3, 2 | |
| shufps xmm0, xmm0, 0 | |
| andps xmm1, xmm4 | |
| andps xmm2, xmm4 | |
| andps xmm3, xmm4 | |
| mulps xmm5, xmm0 | |
| CVTDQ2PS xmm1, xmm1 | |
| CVTDQ2PS xmm2, xmm2 | |
| CVTDQ2PS xmm3, xmm3 | |
| mulps xmm1, xmm1 | |
| mulps xmm2, xmm2 | |
| mulps xmm3, xmm3 | |
| mulps xmm1, xmm5 | |
| mulps xmm2, xmm5 | |
| mulps xmm3, xmm5 | |
| MOVLHPS xmm4, xmm1 //HADDD L1,.. (H,L) | |
| movhlps xmm4, xmm3 //L1,H3 | |
| shufps xmm3, xmm1, $E4 //H1,L3 | |
| MOVHLPS xmm0, xmm2 //..,H2 | |
| addps xmm4, xmm3 //11,33 | |
| addps xmm0, xmm2 //..,22 | |
| pshufd xmm5, xmm4, $B1 //can't copy 1 dw to more than 1 dest! | |
| pshufd xmm2, xmm0, $B1 | |
| addps xmm5, xmm4 //3,1 | |
| addss xmm2, xmm0 //.,2 | |
| movups [ecx], xmm5 // r,.,b | |
| movss [ecx + 4], xmm2 // .,g,. | |
| add esp, 16 | |
| end; | |
| procedure ColToSVecSqrSSE2_16(c: T4Cardinal; sv, svout: TPSVec); //svout := sumof([0..3] cardinal colors * sv[0..3]) | |
| const csmul: Single = 1 {255.0} / (65535.0 * 65535.0); | |
| asm | |
| MOVDQU xmm5, [edx] | |
| add esp, -16 | |
| mov edx, [eax] | |
| mov edx, [edx] | |
| mov [esp], edx | |
| mov edx, [eax + 4] | |
| mov edx, [edx] | |
| mov [esp + 4], edx | |
| mov edx, [eax + 8] | |
| mov edx, [edx] | |
| mov [esp + 8], edx | |
| mov edx, [eax + 12] | |
| mov edx, [edx] | |
| mov [esp + 12], edx | |
| movss xmm0, csmul | |
| MOVDQU xmm1, [esp] | |
| MOVDQU xmm4, sva16 | |
| MOVDQA xmm2, xmm1 | |
| mov edx, [eax] | |
| mov edx, [edx + 4] | |
| mov [esp], edx | |
| mov edx, [eax + 4] | |
| mov edx, [edx + 4] | |
| mov [esp + 4], edx | |
| mov edx, [eax + 8] | |
| mov eax, [eax + 12] | |
| mov edx, [edx + 4] | |
| mov eax, [eax + 4] | |
| mov [esp + 8], edx | |
| mov [esp + 12], eax | |
| MOVDQA xmm2, xmm1 | |
| MOVDQU xmm3, [esp] | |
| PSRLDQ xmm2, 2 | |
| shufps xmm0, xmm0, 0 | |
| andps xmm1, xmm4 | |
| andps xmm2, xmm4 | |
| andps xmm3, xmm4 | |
| mulps xmm5, xmm0 | |
| CVTDQ2PS xmm1, xmm1 | |
| CVTDQ2PS xmm2, xmm2 | |
| CVTDQ2PS xmm3, xmm3 | |
| mulps xmm1, xmm1 | |
| mulps xmm2, xmm2 | |
| mulps xmm3, xmm3 | |
| mulps xmm1, xmm5 | |
| mulps xmm2, xmm5 | |
| mulps xmm3, xmm5 | |
| MOVLHPS xmm4, xmm1 | |
| movhlps xmm4, xmm3 | |
| shufps xmm3, xmm1, $E4 | |
| MOVHLPS xmm0, xmm2 | |
| addps xmm4, xmm3 | |
| addps xmm0, xmm2 | |
| pshufd xmm5, xmm4, $B1 | |
| pshufd xmm2, xmm0, $B1 | |
| addps xmm5, xmm4 | |
| addss xmm2, xmm0 | |
| movups [ecx], xmm5 // r,.,b | |
| movss [ecx + 4], xmm2 // .,g,. | |
| add esp, 16 | |
| end; | |
| procedure ColToSVecSSE2_16(c: T4Cardinal; sv, svout: TPSVec); //svout := sumof([0..3] cardinal colors * sv[0..3]) | |
| const csmul: Single = 1 {255.0} / 65535.0; | |
| asm | |
| MOVDQU xmm5, [edx] | |
| add esp, -16 | |
| mov edx, [eax] | |
| mov edx, [edx] | |
| mov [esp], edx | |
| mov edx, [eax + 4] | |
| mov edx, [edx] | |
| mov [esp + 4], edx | |
| mov edx, [eax + 8] | |
| mov edx, [edx] | |
| mov [esp + 8], edx | |
| mov edx, [eax + 12] | |
| mov edx, [edx] | |
| mov [esp + 12], edx | |
| movss xmm0, csmul | |
| MOVDQU xmm1, [esp] | |
| MOVDQU xmm4, sva16 | |
| MOVDQA xmm2, xmm1 | |
| mov edx, [eax] | |
| mov edx, [edx + 4] | |
| mov [esp], edx | |
| mov edx, [eax + 4] | |
| mov edx, [edx + 4] | |
| mov [esp + 4], edx | |
| mov edx, [eax + 8] | |
| mov eax, [eax + 12] | |
| mov edx, [edx + 4] | |
| mov eax, [eax + 4] | |
| mov [esp + 8], edx | |
| mov [esp + 12], eax | |
| MOVDQA xmm2, xmm1 | |
| MOVDQU xmm3, [esp] | |
| PSRLDQ xmm2, 2 | |
| shufps xmm0, xmm0, 0 | |
| andps xmm1, xmm4 | |
| andps xmm2, xmm4 | |
| andps xmm3, xmm4 | |
| mulps xmm5, xmm0 | |
| CVTDQ2PS xmm1, xmm1 | |
| CVTDQ2PS xmm2, xmm2 | |
| CVTDQ2PS xmm3, xmm3 | |
| mulps xmm1, xmm5 | |
| mulps xmm2, xmm5 | |
| mulps xmm3, xmm5 | |
| MOVLHPS xmm4, xmm1 | |
| movhlps xmm4, xmm3 | |
| shufps xmm3, xmm1, $E4 | |
| MOVHLPS xmm0, xmm2 | |
| addps xmm4, xmm3 | |
| addps xmm0, xmm2 | |
| pshufd xmm5, xmm4, $B1 | |
| pshufd xmm2, xmm0, $B1 | |
| addps xmm5, xmm4 | |
| addss xmm2, xmm0 | |
| movups [ecx], xmm5 // r,.,b | |
| movss [ecx + 4], xmm2 // .,g,. | |
| add esp, 16 | |
| end; | |
| function ColToSVecFlipRBc4sqr(c: T4Cardinal): T4SVec; | |
| asm | |
| push ebx | |
| push esi | |
| push edi | |
| add esp, -16 | |
| mov ebx, [eax] | |
| mov ecx, [eax + 4] | |
| mov esi, [eax + 8] | |
| mov edi, [eax + 12] | |
| mov ebx, [ebx + 2] //dereferenz | |
| mov ecx, [ecx + 2] | |
| mov esi, [esi + 2] | |
| mov edi, [edi + 2] | |
| and ebx, $FF | |
| and ecx, $FF | |
| and esi, $FF | |
| and edi, $FF | |
| fld s1d255 | |
| mov [esp], ebx | |
| mov [esp + 4], ecx | |
| mov [esp + 8], esi | |
| mov [esp + 12], edi | |
| fild dword [esp] | |
| fild dword [esp + 4] | |
| fild dword [esp + 8] | |
| fild dword [esp + 12] | |
| fmul st, st(0) | |
| fmul st, st(4) | |
| fstp dword [edx + 48] | |
| fmul st, st(0) | |
| fmul st, st(3) | |
| fstp dword [edx + 32] | |
| fmul st, st(0) | |
| fmul st, st(2) | |
| fstp dword [edx + 16] | |
| fmul st, st(0) | |
| fmul st, st(1) | |
| fstp dword [edx] | |
| mov ebx, [eax] | |
| mov ecx, [eax + 4] | |
| mov esi, [eax + 8] | |
| mov edi, [eax + 12] | |
| mov ebx, [ebx + 1] //dereferenz | |
| mov ecx, [ecx + 1] | |
| mov esi, [esi + 1] | |
| mov edi, [edi + 1] | |
| and ebx, $FF | |
| and ecx, $FF | |
| and esi, $FF | |
| and edi, $FF | |
| mov [esp], ebx | |
| mov [esp + 4], ecx | |
| mov [esp + 8], esi | |
| mov [esp + 12], edi | |
| fild dword [esp] | |
| fild dword [esp + 4] | |
| fild dword [esp + 8] | |
| fild dword [esp + 12] | |
| fmul st, st(0) | |
| fmul st, st(4) | |
| fstp dword [edx + 52] | |
| fmul st, st(0) | |
| fmul st, st(3) | |
| fstp dword [edx + 36] | |
| fmul st, st(0) | |
| fmul st, st(2) | |
| fstp dword [edx + 20] | |
| fmul st, st(0) | |
| fmul st, st(1) | |
| fstp dword [edx + 4] | |
| mov ebx, [eax] | |
| mov ecx, [eax + 4] | |
| mov esi, [eax + 8] | |
| mov edi, [eax + 12] | |
| mov ebx, [ebx] //dereferenz | |
| mov ecx, [ecx] | |
| mov esi, [esi] | |
| mov edi, [edi] | |
| and ebx, $FF | |
| and ecx, $FF | |
| and esi, $FF | |
| and edi, $FF | |
| xor eax, eax | |
| mov [esp], ebx | |
| mov [esp + 4], ecx | |
| mov [esp + 8], esi | |
| mov [esp + 12], edi | |
| fild dword [esp] | |
| fild dword [esp + 4] | |
| fild dword [esp + 8] | |
| fild dword [esp + 12] | |
| mov [edx + 12], eax | |
| fmul st, st(0) | |
| mov [edx + 28], eax | |
| mov [edx + 44], eax | |
| fmul st, st(4) | |
| mov [edx + 60], eax | |
| fstp dword [edx + 56] | |
| fmul st, st(0) | |
| fmul st, st(3) | |
| fstp dword [edx + 40] | |
| fmul st, st(0) | |
| fmul st, st(2) | |
| fstp dword [edx + 24] | |
| fmul st, st(0) | |
| fmulp | |
| fstp dword [edx + 8] | |
| add esp, 16 | |
| pop edi | |
| pop esi | |
| pop ebx | |
| end; | |
| function ColToSVecFlipRBc4sqr16(c: T4Cardinal): T4SVec; | |
| const cdmul: Double = 255.0 / (65535.0 * 65535.0); | |
| asm | |
| push ebx | |
| push esi | |
| push edi | |
| add esp, -16 | |
| mov ebx, [eax] //pointers | |
| mov ecx, [eax + 4] | |
| mov esi, [eax + 8] | |
| mov edi, [eax + 12] | |
| mov ebx, [ebx + 4] //dereferenz | |
| mov ecx, [ecx + 4] | |
| mov esi, [esi + 4] | |
| mov edi, [edi + 4] | |
| and ebx, $FFFF | |
| and ecx, $FFFF | |
| and esi, $FFFF | |
| and edi, $FFFF | |
| fld cdmul | |
| mov [esp], ebx | |
| mov [esp + 4], ecx | |
| mov [esp + 8], esi | |
| mov [esp + 12], edi | |
| fild dword [esp] | |
| fild dword [esp + 4] | |
| fild dword [esp + 8] | |
| fild dword [esp + 12] | |
| fmul st, st(0) | |
| fmul st, st(4) | |
| fstp dword [edx + 48] | |
| fmul st, st(0) | |
| fmul st, st(3) | |
| fstp dword [edx + 32] | |
| fmul st, st(0) | |
| fmul st, st(2) | |
| fstp dword [edx + 16] | |
| fmul st, st(0) | |
| fmul st, st(1) | |
| fstp dword [edx] | |
| mov ebx, [eax] | |
| mov ecx, [eax + 4] | |
| mov esi, [eax + 8] | |
| mov edi, [eax + 12] | |
| mov ebx, [ebx + 2] //dereferenz | |
| mov ecx, [ecx + 2] | |
| mov esi, [esi + 2] | |
| mov edi, [edi + 2] | |
| and ebx, $FFFF | |
| and ecx, $FFFF | |
| and esi, $FFFF | |
| and edi, $FFFF | |
| mov [esp], ebx | |
| mov [esp + 4], ecx | |
| mov [esp + 8], esi | |
| mov [esp + 12], edi | |
| fild dword [esp] //loads signed integer, therefore 16 bit direct iload would fail | |
| fild dword [esp + 4] | |
| fild dword [esp + 8] | |
| fild dword [esp + 12] | |
| fmul st, st(0) | |
| fmul st, st(4) | |
| fstp dword [edx + 52] | |
| fmul st, st(0) | |
| fmul st, st(3) | |
| fstp dword [edx + 36] | |
| fmul st, st(0) | |
| fmul st, st(2) | |
| fstp dword [edx + 20] | |
| fmul st, st(0) | |
| fmul st, st(1) | |
| fstp dword [edx + 4] | |
| mov ebx, [eax] | |
| mov ecx, [eax + 4] | |
| mov esi, [eax + 8] | |
| mov edi, [eax + 12] | |
| mov ebx, [ebx] //dereferenz | |
| mov ecx, [ecx] | |
| mov esi, [esi] | |
| mov edi, [edi] | |
| and ebx, $FFFF | |
| and ecx, $FFFF | |
| and esi, $FFFF | |
| and edi, $FFFF | |
| xor eax, eax | |
| mov [esp], ebx | |
| mov [esp + 4], ecx | |
| mov [esp + 8], esi | |
| mov [esp + 12], edi | |
| fild dword [esp] | |
| fild dword [esp + 4] | |
| fild dword [esp + 8] | |
| fild dword [esp + 12] | |
| mov [edx + 12], eax | |
| mov [edx + 28], eax | |
| fmul st, st(0) | |
| mov [edx + 44], eax | |
| fmul st, st(4) | |
| mov [edx + 60], eax | |
| fstp dword [edx + 56] | |
| fmul st, st(0) | |
| fmul st, st(3) | |
| fstp dword [edx + 40] | |
| fmul st, st(0) | |
| fmul st, st(2) | |
| fstp dword [edx + 24] | |
| fmul st, st(0) | |
| fmulp | |
| fstp dword [edx + 8] | |
| add esp, 16 | |
| pop edi | |
| pop esi | |
| pop ebx | |
| end; | |
| function ColToSVecFlipRBc416(c: T4Cardinal): T4SVec; | |
| asm | |
| push ebx | |
| push esi | |
| push edi | |
| add esp, -16 | |
| mov ebx, [eax] //pointers | |
| mov ecx, [eax + 4] | |
| mov esi, [eax + 8] | |
| mov edi, [eax + 12] | |
| mov ebx, [ebx + 4] //dereferenz | |
| mov ecx, [ecx + 4] | |
| mov esi, [esi + 4] | |
| mov edi, [edi + 4] | |
| and ebx, $FFFF | |
| and ecx, $FFFF | |
| and esi, $FFFF | |
| and edi, $FFFF | |
| fld d1d256 | |
| mov [esp], ebx | |
| mov [esp + 4], ecx | |
| mov [esp + 8], esi | |
| mov [esp + 12], edi | |
| fild dword [esp] | |
| fild dword [esp + 4] | |
| fild dword [esp + 8] | |
| fild dword [esp + 12] | |
| fmul st, st(4) | |
| fstp dword [edx + 48] | |
| fmul st, st(3) | |
| fstp dword [edx + 32] | |
| fmul st, st(2) | |
| fstp dword [edx + 16] | |
| fmul st, st(1) | |
| fstp dword [edx] | |
| mov ebx, [eax] | |
| mov ecx, [eax + 4] | |
| mov esi, [eax + 8] | |
| mov edi, [eax + 12] | |
| mov ebx, [ebx + 2] //dereferenz | |
| mov ecx, [ecx + 2] | |
| mov esi, [esi + 2] | |
| mov edi, [edi + 2] | |
| and ebx, $FFFF | |
| and ecx, $FFFF | |
| and esi, $FFFF | |
| and edi, $FFFF | |
| mov [esp], ebx | |
| mov [esp + 4], ecx | |
| mov [esp + 8], esi | |
| mov [esp + 12], edi | |
| fild dword [esp] //loads signed integer, therefore 16 bit direct iload would fail | |
| fild dword [esp + 4] | |
| fild dword [esp + 8] | |
| fild dword [esp + 12] | |
| fmul st, st(4) | |
| fstp dword [edx + 52] | |
| fmul st, st(3) | |
| fstp dword [edx + 36] | |
| fmul st, st(2) | |
| fstp dword [edx + 20] | |
| fmul st, st(1) | |
| fstp dword [edx + 4] | |
| mov ebx, [eax] | |
| mov ecx, [eax + 4] | |
| mov esi, [eax + 8] | |
| mov edi, [eax + 12] | |
| mov ebx, [ebx] //dereferenz | |
| mov ecx, [ecx] | |
| mov esi, [esi] | |
| mov edi, [edi] | |
| and ebx, $FFFF | |
| and ecx, $FFFF | |
| and esi, $FFFF | |
| and edi, $FFFF | |
| xor eax, eax | |
| mov [esp], ebx | |
| mov [esp + 4], ecx | |
| mov [esp + 8], esi | |
| mov [esp + 12], edi | |
| fild dword [esp] | |
| fild dword [esp + 4] | |
| fild dword [esp + 8] | |
| fild dword [esp + 12] | |
| fmul st, st(4) | |
| mov [edx + 12], eax | |
| mov [edx + 28], eax | |
| mov [edx + 44], eax | |
| mov [edx + 60], eax | |
| fstp dword [edx + 56] | |
| fmul st, st(3) | |
| fstp dword [edx + 40] | |
| fmul st, st(2) | |
| fstp dword [edx + 24] | |
| fmulp | |
| fstp dword [edx + 8] | |
| add esp, 16 | |
| pop edi | |
| pop esi | |
| pop ebx | |
| end; | |
| procedure HybridCustomIFStest; | |
| asm | |
| movupd xmm0, [esi - 120] //x,y | |
| movsd xmm1, [esi - 104] //z | |
| mulpd xmm0, [edi - 32] | |
| mulsd xmm1, [edi - 16] | |
| addsd xmm1, xmm0 | |
| unpckhpd xmm0, xmm0 | |
| addsd xmm1, xmm0 | |
| subsd xmm1, [edi - 40] | |
| cmp [edi - 68], 0 | |
| jne @up | |
| andpd xmm1, [edi] | |
| @up: movsd [esi - 32], xmm1 //Rout: Double; //+56 | |
| mov edx, [edi - 52] | |
| test edx, edx | |
| jz @out | |
| push ecx //otrap coloring | |
| add esp, -32 | |
| fld qword [edi - 16] | |
| fld qword [edi - 24] | |
| fld qword [edi - 32] //nx,ny,nz | |
| fld st //makeorthovecs | |
| fabs | |
| fcomp s011 | |
| fnstsw ax | |
| and ah, 41H | |
| jnz @@1 | |
| fld st(2) | |
| fmul st, st | |
| fld st(1) | |
| fmul st, st | |
| faddp | |
| fsqrt | |
| fld1 | |
| fdivrp //1/Sqrt(rr) | |
| fldz | |
| fld st(4) | |
| fmul st, st(2) | |
| fld st(3) | |
| fchs | |
| fmulp st(2), st //vo[0],0,vo[2],nx,ny,nz | |
| jmp @@2 | |
| @@1: | |
| fld st(2) | |
| fmul st, st | |
| fld st(2) | |
| fmul st, st | |
| faddp | |
| fsqrt | |
| fld1 | |
| fdivrp //1/Sqrt(rr) | |
| fld st(3) | |
| fchs | |
| fmul st, st(1) | |
| fld st(3) | |
| fmulp st(2), st //0,vo[1],vo[2],nx,ny,nz | |
| fldz | |
| @@2: | |
| fld st | |
| fmul qword [esi - 120] //x | |
| fld st(2) | |
| fmul qword [esi - 112] //y | |
| faddp | |
| fld st(3) | |
| fmul qword [esi - 104] //z | |
| faddp | |
| fmul qword [edi - 48] | |
| fstp qword [esp] | |
| fld st(5) | |
| fmul st, st(2) | |
| fld st(5) | |
| fmul st, st(4) | |
| fsubrp //r0,vo[0],vo[1],vo[2],nx,ny,nz | |
| fxch | |
| fmul st(6), st //vo[0],r0,vo[1],vo[2],nx,ny,nz*vo[0] | |
| fxch st(4) | |
| fmul st(3), st //nx,r0,vo[1],vo[2]*nx,vo[0],ny,nz*vo[0] | |
| fmulp st(2), st //r0, vo[1]*nx, vo[2]*nx, vo[0], ny, nz*vo[0] | |
| fxch st(4) //ny, vo[1]*nx, vo[2]*nx, vo[0], r0, nz*vo[0] | |
| fmulp st(3), st //vo[1]*nx, vo[2]*nx, vo[0]*ny, r0, nz*vo[0] | |
| fsubrp st(2), st //vo[2]*nx, vo[1]*nx-vo[0]*ny=r2, r0, nz*vo[0] | |
| fsubp st(3), st //r2, r0, nz*vo[0] - vo[2]*nx = r1 | |
| fmul qword [esi - 104] //z | |
| fxch | |
| fmul qword [esi - 120] //x | |
| faddp | |
| fxch | |
| fmul qword [esi - 112] //y | |
| faddp | |
| fmul qword [edi - 48] | |
| fstp qword [esp + 8] | |
| mov eax, esp | |
| mov ecx, esp | |
| call [esi + 268] //+356 - 88 = 268 | |
| mov ecx, [edi - 56] | |
| and ecx, 3 | |
| fld qword [esp + ecx * 8] //col of map | |
| fadd dword [edi - 60] | |
| fmul dword [edi - 64] | |
| fstp qword [esi + 128] | |
| add esp, 32 | |
| pop ecx | |
| @out: | |
| end; | |
| procedure ipow2(var x, y: Double); //x:=x*x-y*y y:=2xy | |
| asm | |
| fld qword [eax] | |
| fld qword [edx] | |
| fld st(0) //y,y,x | |
| fmul st(0), st(2) //y*x,y,x | |
| fadd st(0), st(0) | |
| fstp qword [edx] | |
| fmul st(0), st(0) //y*y,x | |
| fxch | |
| fmul st(0), st(0) //x*x,y*y | |
| fsubrp st(1), st(0) | |
| fstp qword [eax] | |
| end; | |
| procedure ComplexSqr(var xy: TComplex); //x:=x*x-y*y y:=2xy | |
| asm | |
| fld qword [eax] | |
| fld qword [eax + 8] //y,x | |
| fld st(0) //y,y,x | |
| fmul st(0), st(2) //y*x,y,x | |
| fadd st(0), st(0) | |
| fstp qword [eax + 8] | |
| fmul st(0), st(0) //y*y,x | |
| fxch | |
| fmul st(0), st(0) //x*x,y*y | |
| fsubrp st(1), st(0) | |
| fstp qword [eax] | |
| end; | |
| procedure doInterpolHybridSSE2(PIteration3D: TPIteration3D); // new ext version | |
| asm | |
| push eax | |
| push ebx | |
| push ecx | |
| push edx | |
| push esi | |
| push edi //x = edi-32 y = edi-24 .. Rold = edi - 48, Rstop = edi - 40, (i = edi + 212 = btmp = esi - 44) | |
| add esp, -72 | |
| mov edi, eax //was: Rold = esp, Rstop = esp + 8, aligned16: esp + 16, X1 = a16 X2 = a16+8.. Y1 = a16+32 .. | |
| lea esi, eax + 256 | |
| mov eax, esp | |
| add eax, 35 | |
| and eax, $FFFFFFF0 | |
| mov [esp], eax // aligned 16 Ybuf aligned16: esp, X1 = a16.. = Y1 = (aligned) | |
| cvtps2pd xmm7, [edi + 76] //nHybrid[0] +76 weights in double for s1,s2 (lo,hi part) | |
| movupd xmm0, [edi] | |
| movsd xmm1, [edi + 16] | |
| movupd [edi - 32], xmm0 //xyz=C | |
| movupd [edi - 16], xmm1 | |
| cmp dword [esi - 104], 0 //DoJulia:+152 | |
| jz @sjup | |
| movupd xmm2, [esi + 64] | |
| movsd xmm3, [esi + 80] | |
| movupd [edi + 24], xmm2 //J=Ju | |
| movsd [edi + 40], xmm3 | |
| jmp @skipIfJulia | |
| @sjup: | |
| movupd [edi + 24], xmm0 //J=C | |
| movsd [edi + 40], xmm1 | |
| @skipIfJulia: | |
| mulpd xmm0, xmm0 | |
| mulsd xmm1, xmm1 | |
| CVTSS2SD xmm5, [edi + 72] //RStop in double | |
| addsd xmm1, xmm0 | |
| unpckhpd xmm0, xmm0 | |
| movsd [edi - 40], xmm5 | |
| addsd xmm1, xmm0 | |
| xor ebx, ebx | |
| movsd [esi - 64], xmm1 //OTrap=Rout | |
| movsd [edi + 56], xmm1 //Rout | |
| mov [esi - 48], ebx //bFirstIt := 0; +208 | |
| mov [edi + 64], ebx //ItresultI :=0 +64 | |
| @Repeat: | |
| movsd xmm2, [edi + 56] | |
| mov ebx, [edi + 100] //fHPVar[0] +100 | |
| mov eax, [esp] | |
| mov [edi + 48], ebx //PVars: +48 | |
| movsd [edi - 48], xmm2 //Rold := Rout | |
| movupd xmm0, [edi - 32] //Y:=xyz | |
| movupd xmm1, [edi - 16] | |
| movapd [eax], xmm0 | |
| movapd [eax + 16], xmm1 | |
| lea eax, edi - 32 // x | |
| lea edx, edi - 24 // y | |
| lea ecx, edi - 16 // z | |
| lea ebx, edi - 8 // w | |
| push ebx | |
| push edi | |
| call [edi + 124] //fHybrid[0] of ThybridIteration2 | |
| mov eax, [esp] | |
| movupd xmm0, [edi - 32] // mCopyVec4(@x1, @x); | |
| movupd xmm1, [edi - 16] // mCopyVec4(@x, @Y1); | |
| movapd xmm2, [eax] | |
| movapd xmm3, [eax + 16] | |
| movapd [eax], xmm0 | |
| movapd [eax + 16], xmm1 | |
| movupd [edi - 32], xmm2 //xyz=Y1 | |
| movupd [edi - 16], xmm3 | |
| mov ebx, [edi + 104] //fHPVar[1] | |
| mov [edi + 48], ebx //PVars: +48 | |
| lea eax, edi - 32 // x | |
| lea edx, edi - 24 // y | |
| lea ecx, edi - 16 // z | |
| lea ebx, edi - 8 // w | |
| push ebx | |
| push edi | |
| call [edi + 128] //fHybrid[1] of ThybridIteration2 | |
| mov eax, [esp] | |
| movupd xmm0, [edi - 32] //x,y was: y1 | |
| movapd xmm2, [eax] //x[0,1] | |
| movupd xmm1, [edi - 16] //z,w | |
| movapd xmm3, [eax + 16] //x[2,3] | |
| movapd xmm5, xmm0 //x,y | |
| movapd xmm6, xmm2 //x[0,1] | |
| mulpd xmm0, xmm0 //x²,y² | |
| mulpd xmm2, xmm2 //x[0]²,x[1]² | |
| mulsd xmm1, xmm1 //z²,w | |
| mulsd xmm3, xmm3 //x[2]² | |
| addsd xmm1, xmm0 //z²+x² | |
| addsd xmm3, xmm2 //x[2]²+x[0]² | |
| unpckhpd xmm0, xmm0 //y² | |
| unpckhpd xmm2, xmm2 //x[1]² | |
| addsd xmm1, xmm0 //x²+y²+z² | |
| addsd xmm3, xmm2 //x[0]²+x[1]²+x[2]² | |
| unpcklpd xmm3, xmm1 //x[0]²+x[1]²+x[2]²,x²+y²+z² | |
| sqrtpd xmm0, xmm3 //xx,yy | |
| mulpd xmm0, xmm7 //xx*s1,yy*s2 | |
| pshufd xmm2, xmm0, $4E | |
| addsd xmm0, xmm2 //XX = xx*s1+yy*s2 | |
| pshufd xmm3, xmm7, $4E //wy | |
| movsd xmm2, xmm7 //wx | |
| unpcklpd xmm3, xmm3 //s2,s2 | |
| unpcklpd xmm2, xmm2 //s1,s1 | |
| movupd xmm1, [edi - 16] //z,w | |
| mulpd xmm5, xmm3 //x,y *s2 | |
| mulpd xmm6, xmm2 //x[0,1] *s1 | |
| mulpd xmm3, xmm1 //z,w *s2 | |
| mulpd xmm2, [eax + 16] //x[2,3] *s1 | |
| addpd xmm5, xmm6 //x,y | |
| addpd xmm3, xmm2 //z,w | |
| movapd xmm4, xmm5 //x,y | |
| movsd xmm2, xmm3 //z | |
| mulpd xmm4, xmm4 //x²,y² | |
| mulsd xmm2, xmm2 //z² 4D: mulpd | |
| addsd xmm2, xmm4 //z²+x² 4D: addpd ... | |
| unpckhpd xmm4, xmm4 //y² | |
| addsd xmm4, xmm2 //x²+y²+z² | |
| addsd xmm4, d1em40 | |
| sqrtsd xmm4, xmm4 | |
| movsd xmm2, xmm0 //XX | |
| divsd xmm2, xmm4 //YY := XX / Sqrt(x * x + y * y + z * z + 1e-40); | |
| unpcklpd xmm2, xmm2 //YY,YY | |
| mulpd xmm5, xmm2 | |
| mulsd xmm3, xmm2 | |
| movupd [edi - 32], xmm5 //x,y | |
| movupd [edi - 16], xmm3 //z,w | |
| mulsd xmm0, xmm0 | |
| movsd [edi + 56], xmm0 //Rout := XX * XX; | |
| movsd xmm1, xmm0 | |
| inc dword [edi + 64] //Inc(ItResultI) | |
| minsd xmm0, [esi - 64] | |
| movsd [esi - 64], xmm0 //OTrap := Min(Rout, OTrap); | |
| mov eax, [edi + 64] | |
| cmp eax, [edi + 68] //maxIt: +68 | |
| jnl @out | |
| comisd xmm1, [edi - 40] //RStop | |
| jc @Repeat | |
| @out: | |
| cmp byte [esi - 108], 0 //CalcSIT: +148 | |
| jz @NoCalcSITout | |
| mov eax, edi | |
| xor edx, edx | |
| call CalcSmoothIterations //(PIt3D: TPIteration3D; n: Integer); | |
| @NoCalcSITout: | |
| add esp, 72 | |
| pop edi | |
| pop esi | |
| pop edx | |
| pop ecx | |
| pop ebx | |
| pop eax | |
| end; | |
| function doInterpolHybridDESSE2(PIteration3D: TPIteration3D): Double; // new ext version | |
| asm | |
| push eax | |
| push ebx | |
| push ecx | |
| push edx | |
| push esi | |
| push edi //x = edi-32 y = edi-24 .. Rold = edi - 48, Rstop = edi - 40, i = edi + 212 = btmp = esi - 44 | |
| add esp, -72 | |
| mov edi, eax //was: Rold = esp, Rstop = esp + 8, aligned16: esp + 16, X1 = a16 X2 = a16+8.. Y1 = a16+32 .. | |
| lea esi, eax + 256 | |
| mov eax, esp | |
| add eax, 35 | |
| and eax, $FFFFFFF0 | |
| mov [esp], eax // aligned 16 Ybuf aligned16: esp, X1 = a16.. = Y1 = (aligned) | |
| cvtps2pd xmm7, [edi + 76] //nHybrid[0] +76 weights in double for s1,s2 (lo,hi part) | |
| movupd xmm0, [edi] | |
| movsd xmm1, [edi + 16] | |
| movupd [edi - 32], xmm0 //xyz=C | |
| movupd [edi - 16], xmm1 | |
| cmp dword [esi - 104], 0 //DoJulia:+152 | |
| jz @sjup | |
| movupd xmm2, [esi + 64] | |
| movsd xmm3, [esi + 80] | |
| movupd [edi + 24], xmm2 //J=Ju | |
| movsd [edi + 40], xmm3 | |
| jmp @skipIfJulia | |
| @sjup: | |
| movupd [edi + 24], xmm0 //J=C | |
| movsd [edi + 40], xmm1 | |
| @skipIfJulia: | |
| mulpd xmm0, xmm0 | |
| mulsd xmm1, xmm1 | |
| CVTSS2SD xmm5, [edi + 72] //RStop in double | |
| addsd xmm1, xmm0 | |
| unpckhpd xmm0, xmm0 | |
| movsd [edi - 40], xmm5 | |
| addsd xmm1, xmm0 | |
| xor ebx, ebx | |
| movsd [esi - 64], xmm1 //OTrap=Rout | |
| movsd [edi + 56], xmm1 //Rout | |
| mov [esi - 48], ebx //bFirstIt := 0; +208 | |
| mov [edi + 64], ebx //ItresultI :=0 +64 | |
| mov eax, [esi - 96] //DEoption +160 | |
| and eax, $18 | |
| sub eax, 16 | |
| jnz @UU1 | |
| fld qword [edi + 56] | |
| jmp @UU2 | |
| @UU1: | |
| fld1 | |
| @UU2: | |
| fstp qword [edi - 8] // if (DEoption and $18) = 16 then w := Rout else w := 1; | |
| @Repeat: | |
| movsd xmm2, [edi + 56] | |
| mov ebx, [edi + 100] //fHPVar[0] +100 | |
| mov eax, [esp] | |
| mov [edi + 48], ebx //PVars: +48 | |
| movsd [edi - 48], xmm2 //Rold := Rout | |
| movupd xmm0, [edi - 32] //Y:=xyz | |
| movupd xmm1, [edi - 16] | |
| movapd [eax], xmm0 | |
| movapd [eax + 16], xmm1 | |
| lea eax, edi - 32 // x | |
| lea edx, edi - 24 // y | |
| lea ecx, edi - 16 // z | |
| lea ebx, edi - 8 // w | |
| push ebx | |
| push edi | |
| call [edi + 124] //fHybrid[0] of ThybridIteration2 | |
| mov eax, [esp] | |
| movupd xmm0, [edi - 32] // mCopyVec4(@x1, @x); | |
| movupd xmm1, [edi - 16] // mCopyVec4(@x, @Y1); | |
| movapd xmm2, [eax] | |
| movapd xmm3, [eax + 16] | |
| movapd [eax], xmm0 | |
| movapd [eax + 16], xmm1 | |
| movupd [edi - 32], xmm2 //xyz=Y1 | |
| movupd [edi - 16], xmm3 | |
| mov ebx, [edi + 104] //fHPVar[1] | |
| mov [edi + 48], ebx //PVars: +48 | |
| lea eax, edi - 32 // x | |
| lea edx, edi - 24 // y | |
| lea ecx, edi - 16 // z | |
| lea ebx, edi - 8 // w | |
| push ebx | |
| push edi | |
| call [edi + 128] //fHybrid[1] of ThybridIteration2 | |
| mov eax, [esp] | |
| movupd xmm0, [edi - 32] //x,y was: y1 | |
| movapd xmm2, [eax] //x[0,1] | |
| movupd xmm1, [edi - 16] //z,w | |
| movapd xmm3, [eax + 16] //x[2,3] | |
| movapd xmm5, xmm0 //x,y | |
| movapd xmm6, xmm2 //x[0,1] | |
| mulpd xmm0, xmm0 //x²,y² | |
| mulpd xmm2, xmm2 //x[0]²,x[1]² | |
| mulsd xmm1, xmm1 //z²,w | |
| mulsd xmm3, xmm3 //x[2]² | |
| addsd xmm1, xmm0 //z²+x² | |
| addsd xmm3, xmm2 //x[2]²+x[0]² | |
| unpckhpd xmm0, xmm0 //y² | |
| unpckhpd xmm2, xmm2 //x[1]² | |
| addsd xmm1, xmm0 //x²+y²+z² | |
| addsd xmm3, xmm2 //x[0]²+x[1]²+x[2]² | |
| unpcklpd xmm3, xmm1 //x[0]²+x[1]²+x[2]²,x²+y²+z² | |
| sqrtpd xmm0, xmm3 //xx,yy | |
| mulpd xmm0, xmm7 //xx*s1,yy*s2 | |
| pshufd xmm2, xmm0, $4E | |
| addsd xmm0, xmm2 //XX = xx*s1+yy*s2 | |
| pshufd xmm3, xmm7, $4E //wy | |
| movsd xmm2, xmm7 //wx | |
| unpcklpd xmm3, xmm3 //s2,s2 | |
| unpcklpd xmm2, xmm2 //s1,s1 | |
| movupd xmm1, [edi - 16] //z,w | |
| mulpd xmm5, xmm3 //x,y *s2 | |
| mulpd xmm6, xmm2 //x[0,1] *s1 | |
| mulpd xmm3, xmm1 //z,w *s2 | |
| mulpd xmm2, [eax + 16] //x[2,3] *s1 | |
| addpd xmm5, xmm6 //x,y | |
| addpd xmm3, xmm2 //z,w | |
| movapd xmm4, xmm5 //x,y | |
| movsd xmm2, xmm3 //z | |
| mulpd xmm4, xmm4 //x²,y² | |
| mulsd xmm2, xmm2 //z² 4D: mulpd | |
| addsd xmm2, xmm4 //z²+x² 4D: addpd ... | |
| unpckhpd xmm4, xmm4 //y² | |
| addsd xmm4, xmm2 //x²+y²+z² | |
| addsd xmm4, d1em40 | |
| sqrtsd xmm4, xmm4 | |
| movsd xmm2, xmm0 //XX | |
| divsd xmm2, xmm4 //YY := XX / Sqrt(x * x + y * y + z * z + 1e-40); | |
| unpcklpd xmm2, xmm2 //YY,YY | |
| mulpd xmm5, xmm2 | |
| mulsd xmm3, xmm2 | |
| movupd [edi - 32], xmm5 //x,y | |
| movupd [edi - 16], xmm3 //z,w | |
| mulsd xmm0, xmm0 | |
| movsd [edi + 56], xmm0 //Rout := XX * XX; | |
| movsd xmm1, xmm0 | |
| inc dword [edi + 64] //Inc(ItResultI) | |
| minsd xmm0, [esi - 64] | |
| movsd [esi - 64], xmm0 //OTrap := Min(Rout, OTrap); | |
| mov eax, [edi + 64] | |
| cmp eax, [edi + 68] //maxIt: +68 | |
| jnl @out | |
| comisd xmm1, [edi - 40] //RStop | |
| jc @Repeat | |
| @out: | |
| mov eax, [esi - 96] //DEoption +160 | |
| and eax, 7 | |
| sub eax, 4 | |
| jnz @UU3 //Result := Abs(z) * Ln(Abs(z)) / w; | |
| fld qword [edi - 16] | |
| fabs | |
| fldln2 | |
| fld st(1) | |
| fyl2x | |
| fmulp | |
| fdiv qword [edi - 8] //Result | |
| jmp @UU6 | |
| @UU3: | |
| sub eax, 3 // / intPower faster? | |
| jnz @UU4 //Result := Sqrt(Rout/RStop) * Power(PDouble(Integer(PVar) - 16)^, -ItResultI); | |
| mov eax, [edi + 48] | |
| fild dword [edi + 64] //ItResultI | |
| fchs //-ItresultI | |
| fld qword [eax - 16] //(Pvar-16)^ (= scale or something) | |
| fldln2 //power function base,expo -> st, st(1) | |
| fxch | |
| fyl2x | |
| fxch | |
| fmulp | |
| fldl2e | |
| fmulp | |
| fld st(0) | |
| frndint | |
| fsub st(1), st(0) | |
| fxch | |
| f2xm1 | |
| fld1 | |
| faddp | |
| fscale | |
| fstp st(1) //end of power function | |
| fld qword [edi + 56] | |
| fdiv dword [edi + 72] //rout/rstop,pow | |
| fsqrt | |
| fmulp | |
| jmp @UU6 | |
| @UU4: // else Result := Sqrt(Rout) / Abs(w); | |
| fld qword [edi + 56] | |
| fsqrt | |
| fld qword [edi - 8] | |
| fabs | |
| fdivp | |
| @UU6: | |
| cmp byte [esi - 108], 0 //CalcSIT: +148 | |
| jz @NoCalcSITout | |
| mov eax, edi | |
| xor edx, edx | |
| call CalcSmoothIterations //(PIt3D: TPIteration3D; n: Integer); | |
| @NoCalcSITout: | |
| add esp, 72 | |
| pop edi | |
| pop esi | |
| pop edx | |
| pop ecx | |
| pop ebx | |
| pop eax | |
| end; | |
| function doHybridIFS3D(PIteration3D: TPIteration3D): Double; | |
| asm | |
| push eax | |
| push ebx | |
| push ecx | |
| push edx | |
| push esi | |
| push edi //x = esi-128 y = esi-120 .. btmp = esi+116 (eax+212) | |
| lea esi, eax + 88 | |
| movupd xmm0, [eax] | |
| movsd xmm1, [eax + 16] | |
| movupd [eax - 32], xmm0 //X=Cx | |
| movsd [eax - 16], xmm1 | |
| lea edx, esi + 128 | |
| cmp dword [esi + 64], 0 //DoJulia:+152 | |
| jz @sjup | |
| movupd xmm0, [edx + 104] //J=Ju +320 -88=+232 -128=104 | |
| movsd xmm1, [edx + 120] | |
| @sjup: | |
| movupd [eax + 24], xmm0 //J=C | |
| movsd [eax + 40], xmm1 | |
| @skipIfJulia: | |
| xor ebx, ebx //n:=0 | |
| mov eax, [esi + 296] | |
| mov [esi + 120], ebx //bFirstIt := 0; +208 | |
| mov [esi - 24], ebx //ItresultI:=0 +64 | |
| mov [esi - 36], eax //bIsInsideRender tmp in SmothIts | |
| movzx ebx, word [esi + 102] | |
| fldz | |
| fld d65535 //minDE ini | |
| fld1 | |
| fstp qword [esi + 112] //VaryScale: //+200 absScale, must be changed in formulas | |
| fstp qword [esi + TIteration3Dext.OTrap - 144] // 104 OTrap: Double; //+192 min of AbsScale | |
| fstp qword [edx + TIteration3Dext.Dfree1 - 144 - 128] //+248 +56 | |
| mov edi, [esi + ebx * 4 + 12] //fHPVar[0] +100 | |
| mov ecx, [esi + ebx * 4 - 12] //i:=nHybrid[0] +76 | |
| and ecx, $7FFFFFFF | |
| @Repeat: | |
| cmp ecx, 0 | |
| jnle @up2 | |
| @While: | |
| inc ebx | |
| cmp bx, word [esi + 62] //5 wEndTo: Word; //+150 | |
| jle @up3 | |
| movzx ebx, word [esi + 100] //n := iRepeatFrom //+188 | |
| @up3: | |
| mov ecx, [esi + ebx * 4 - 12] //i := nHybrid[n]; +76 | |
| and ecx, $7FFFFFFF | |
| jle @While | |
| mov edi, [esi + ebx * 4 + 12] //fHPVar:array[0..5] of Pointer; //+100 | |
| @up2: | |
| call [esi + ebx * 4 + 36] //fHybrid[0..5] of ThybridIteration2; //+124 | |
| dec ecx //Dec(i) | |
| cmp [esi + ebx * 4 - 12], 0 | |
| jl @Repeat | |
| movsd xmm0, [esi - 32] //DEout relative; Rout: Double; //+56 | |
| inc dword [esi - 24] //Inc(ItResultI) //+64 | |
| divsd xmm0, [esi + 112] //abs Scale VaryScale: Double; //+200 | |
| mov eax, [esi - 24] | |
| ucomisd xmm0, [esi + 104] // memorize the smallest DE for itresult | |
| jnc @skip | |
| lea edx, esi + 104 | |
| mov [esi + 124], eax // bTmp: Integer; //+212 | |
| fld qword [edx + TIteration3Dext.Dfree1 - 144-104] //+128 | |
| movsd [edx], xmm0 //result DE output | |
| fstp qword [edx + TIteration3Dext.Dfree2 - 144-104] //+136 | |
| cmp dword [esi - 36], 0 //was: +384 -88=296 bIsInsideRender | |
| js @skip //if outside, compare if DE is lower than minDE | |
| ucomisd xmm0, [esi - 128] //compare with RstopD, that contains the DEstop condition. Stop if nearer. | |
| jc @out | |
| @skip: | |
| cmp eax, [esi - 20] //maxIt: +68 | |
| jl @Repeat | |
| @out: | |
| fild dword [esi + 124] | |
| mov eax, [esi + 124] //it on minDE | |
| fstp dword [esi - 36] //SmoothItD: Single; //+52 | |
| mov [esi - 24], eax //ItResultI | |
| add esi, 104 | |
| fld qword [esi] //MinDE in OTrap | |
| fld qword [esi + 32] //Dfree2 | |
| fstp qword [esi] //OTrap | |
| pop edi | |
| pop esi | |
| pop edx | |
| pop ecx | |
| pop ebx | |
| pop eax | |
| end; | |
| function doHybridIFS3DnoVecIni(PIteration3D: TPIteration3D): Double; //to use behind common fractals, use the new vec for it | |
| asm | |
| push eax | |
| push ebx | |
| push ecx | |
| push edx | |
| push esi | |
| push edi //x = esi-128 y = esi-120 .. btmp = esi+116 (eax+212) | |
| lea esi, eax + 88 // | |
| xor ebx, ebx //n:=0 | |
| mov eax, [esi + 296] | |
| mov [esi + 120], ebx //bFirstIt := 0; +208 | |
| mov [esi + 124], ebx | |
| mov [esi - 24], ebx //ItresultI:=0 +64 | |
| mov [esi - 36], eax //bIsInsideRender tmp in SmothIts | |
| movzx ebx, word [esi + 102] //n := iStartFrom | |
| fldz | |
| fld d65535 //minDE ini | |
| fld1 | |
| fstp qword [esi + 112] //VaryScale: //+200 absScale, must be changed in formulas | |
| fstp qword [esi + TIteration3Dext.OTrap - 144] // 104 OTrap: Double; //+192 min of AbsScale | |
| fstp qword [esi + TIteration3Dext.Dfree1 - 144] //+248 +56 | |
| mov edi, [esi + ebx * 4 + 12] //fHPVar[0] +100 | |
| mov ecx, [esi + ebx * 4 - 12] //i:=nHybrid[n] +76 | |
| and ecx, $7FFFFFFF | |
| @Repeat: | |
| cmp ecx, 0 | |
| jnle @up2 | |
| @While: | |
| inc ebx | |
| cmp bx, word [esi + 62] //5 wEndTo: Word; //+150 | |
| jle @up3 | |
| movzx ebx, word [esi + 100] //n := iRepeatFrom //+188 | |
| @up3: | |
| mov ecx, [esi + ebx * 4 - 12] //i := nHybrid[n]; +76 | |
| and ecx, $7FFFFFFF | |
| jle @While | |
| mov edi, [esi + ebx * 4 + 12] //fHPVar:array[0..5] of Pointer; //+100 | |
| @up2: | |
| call [esi + ebx * 4 + 36] //fHybrid[0..5] of ThybridIteration2; //+124 | |
| dec ecx //Dec(i) | |
| cmp [esi + ebx * 4 - 12], 0 | |
| jl @Repeat | |
| movsd xmm0, [esi - 32] //DEout relative; Rout: Double; //+56 | |
| inc dword [esi - 24] //Inc(ItResultI) //+64 | |
| divsd xmm0, [esi + 112] //abs Scale VaryScale: Double; //+200 | |
| mov eax, [esi - 24] | |
| ucomisd xmm0, [esi + 104] // memorize the smallest DE for itresult | |
| jnc @skip | |
| lea edx, esi + 104 | |
| mov [esi + 124], eax // bTmp: Integer; //+212 | |
| fld qword [edx + TIteration3Dext.Dfree1 - 144-104] //+128 otrap color option | |
| movsd [edx], xmm0 //result DE output | |
| fstp qword [edx + TIteration3Dext.Dfree2 - 144-104] //+136 | |
| cmp dword [esi - 36], 0 //was: +384 -88=296 bIsInsideRender | |
| jne @skip //if outside, compare if DE is lower than minDE | |
| ucomisd xmm0, [esi - 128] //compare with RstopD, that contains the DEstop condition. Stop if nearer. | |
| jc @out | |
| @skip: | |
| cmp eax, [esi - 20] //maxIt: +68 | |
| jl @Repeat | |
| @out: | |
| fild dword [esi + 124] | |
| mov eax, [esi + 124] //it on minDE | |
| fstp dword [esi - 36] //SmoothItD: Single; //+52 | |
| mov [esi - 24], eax //ItResultI | |
| add esi, 104 | |
| fld qword [esi] //MinDE in OTrap | |
| fld qword [esi + 32] //Dfree2 | |
| fstp qword [esi] //OTrap | |
| pop edi | |
| pop esi | |
| pop edx | |
| pop ecx | |
| pop ebx | |
| pop eax | |
| end; | |
| procedure CalcSmoothIterations(PIt3D: TPIteration3D; n: Integer); | |
| asm | |
| add eax, $34 | |
| cmp dword [eax + TIteration3D.Rout + 4 - $34], $3FF00000 //Rout <= 1? [Rout+4] //+$3c cmp with $3FF0.. does not work always!!! | |
| jg @@1 | |
| fild dword [eax + TIteration3D.ItResultI - $34] //+$40 | |
| fstp dword [eax + TIteration3D.SmoothItD - $34] //+$34 | |
| ret | |
| @@1: | |
| fld qword [eax + TIteration3D.Rout - $34] //+$38 Rout | |
| cmp dword [eax + TIteration3Dext.Rold - 56 + 4 - $34], $3FF00000 //Rold <= 1? -$2c | |
| jnb @@2 | |
| fldln2 | |
| fxch //Rout,ln | |
| fyl2x | |
| fmul s05 //ln(Rout)*0.5 | |
| fldln2 | |
| fxch | |
| fyl2x | |
| fmul dword [eax + edx * 4 + TIteration3D.fHln - $34] // PIt3D.fHln[n] +$00a4 | |
| fild dword [eax + TIteration3D.ItResultI - $34] //+$40 | |
| fadd dword [eax + TIteration3D.LNRStop - $34] //+$009c | |
| fsubrp | |
| fstp dword [eax + TIteration3D.SmoothItD - $34] //+$34 | |
| ret | |
| @@2: | |
| fldln2 | |
| fxch | |
| fyl2x //ln(Rout) | |
| fmul s05 | |
| fldln2 | |
| fxch | |
| fyl2x //d | |
| fldln2 //ln2,d | |
| fld qword [eax + TIteration3Dext.Rold - 56 - $34] //Rold,ln2,d | |
| fyl2x | |
| fmul s05 | |
| fldln2 | |
| fxch | |
| fyl2x | |
| fsubr st, st(1) //d - Ln(0.5 * Ln(PIt3D.Rold)), d | |
| fld dword [eax + TIteration3D.LNRStop - $34] //+$009c | |
| fsubrp st(2), st //d - Ln(0.5 * Ln(PIt3D.Rold)), PIt3D.LNRStop - d | |
| fadd d1em100 //test | |
| fdivp //div0 sometimes | |
| fiadd dword [eax + TIteration3D.ItResultI - $34] //+$40 | |
| fstp dword [eax + TIteration3D.SmoothItD - $34] //+$34 | |
| end; | |
| procedure doHybrid4DSSE2(PIteration3D: TPIteration3D); //new ext version | |
| asm | |
| push eax | |
| push ebx | |
| push ecx | |
| push edx | |
| push esi | |
| push edi //x = edi-32 y = edi-24 .. Rold = edi - 48, Rstop = edi - 40, i = edi + 212 = btmp = esi - 44 | |
| mov edi, eax | |
| lea esi, eax + 256 | |
| lea edx, edi -32 | |
| mov ecx, esi | |
| call Rotate4Dex //(@C1, @x, SMatrix4); C1=It3D=eax | |
| movupd xmm6, [edi - 32] | |
| movupd xmm7, [edi - 16] | |
| cmp dword [esi - 104], 0 //DoJulia:+152 | |
| jz @sjup | |
| movupd xmm2, [esi + 64] | |
| movupd xmm3, [esi + 80] | |
| movupd [edi + 24], xmm2 //J=Ju | |
| movlpd [edi + 40], xmm3 | |
| movhpd [edi - 56], xmm3 | |
| jmp @skipIfJulia | |
| @sjup: | |
| movupd [edi + 24], xmm6 //J=C | |
| movlpd [edi + 40], xmm7 | |
| movhpd [edi - 56], xmm7 //J4 = edi - 56 | |
| @skipIfJulia: | |
| mulpd xmm6, xmm6 | |
| mulpd xmm7, xmm7 | |
| CVTSS2SD xmm5, [edi + 72] //RStop in double | |
| addpd xmm7, xmm6 | |
| pshufd xmm6, xmm7, $4E | |
| movsd [edi - 40], xmm5 | |
| addsd xmm7, xmm6 //xmm7=Rout | |
| movsd [esi - 64], xmm7 //OTrap=Rout | |
| movsd [edi + 56], xmm7 //Rout | |
| xor ebx, ebx //n:=0 | |
| mov [esi - 48], ebx //bFirstIt := 0; +208 | |
| mov [edi + 64], ebx //ItresultI:=0 +64 | |
| movzx ebx, word [esi - 66] //n:=iStartFrom | |
| mov eax, [edi + ebx * 4 + 100] //fHPVar[0] +100 | |
| mov [edi + 48], eax //PVars: +48 | |
| mov eax, [edi + ebx * 4 + 76] //i:=nHybrid[0] +76 | |
| and eax, $7FFFFFFF | |
| mov [esi - 44], eax //i(=It3D.btmp) | |
| @Repeat: | |
| movsd [edi - 48], xmm7 //Rold := Rout | |
| cmp dword [esi - 44], 0 | |
| jnle @up2 | |
| @While: | |
| inc ebx | |
| cmp bx, word [esi - 106] //5 wEndTo: Word; //+150 | |
| jle @up3 | |
| movzx ebx, word [esi - 68] //n := iRepeatFrom | |
| @up3: | |
| mov eax, [edi + ebx * 4 + 76] //i := nHybrid[n]; +76 | |
| and eax, $7FFFFFFF | |
| jle @While | |
| mov [esi - 44], eax | |
| mov eax, [edi + ebx * 4 + 100] //fHPVar:array[0..5] of Pointer; | |
| mov [edi + 48], eax //PVars: +48 | |
| @up2: | |
| lea eax, edi - 8 //was: esp + 24 w | |
| push eax | |
| push edi | |
| lea edx, edi - 24 //was: esp + 16 y | |
| lea ecx, edi - 16 //was: esp + 24 z | |
| add eax, -24 // x | |
| call [edi + ebx * 4 + 124] //fHybrid[0..5] of ThybridIteration2; //+124 | |
| dec [esi - 44] //Dec(i) write at addr... false dIFS?? | |
| cmp [edi + ebx * 4 + 76], 0 //nHybrid[fnr] | |
| jl @Repeat //SkipMaxItTest | |
| movupd xmm6, [edi - 32] | |
| movupd xmm7, [edi - 16] | |
| mulpd xmm6, xmm6 | |
| mulpd xmm7, xmm7 | |
| addpd xmm7, xmm6 | |
| pshufd xmm6, xmm7, $4E | |
| addsd xmm7, xmm6 //xmm7=Rout | |
| movsd xmm5, xmm7 | |
| minsd xmm5, qword [esi - 64] | |
| movsd [edi + 56], xmm7 //Rout | |
| movsd [esi - 64], xmm5 //OTrap | |
| inc dword [edi + 64] //Inc(ItResultI) | |
| mov eax, [edi + 64] | |
| cmp eax, [edi + 68] //maxIt: +68 | |
| jnl @out | |
| comisd xmm7, [edi - 40] //RStop | |
| jc @Repeat | |
| @out: | |
| cmp byte [esi - 108], 0 //CalcSIT: +148 | |
| jz @NoCalcSITout | |
| mov eax, edi | |
| mov edx, ebx | |
| call CalcSmoothIterations //(PIt3D: TPIteration3D; n: Integer); | |
| @NoCalcSITout: | |
| pop edi | |
| pop esi | |
| pop edx | |
| pop ecx | |
| pop ebx | |
| pop eax | |
| end; | |
| procedure doHybridSSE2(PIteration3D: TPIteration3D); //new ext version | |
| asm | |
| push eax | |
| push ebx | |
| push ecx | |
| push edx | |
| push esi | |
| push edi //x = edi-32 y = edi-24 .. Rold = edi - 48, Rstop = edi - 40, (i = edi + 212 = btmp = esi - 44) | |
| mov edi, eax // = [edi - 32] | |
| lea esi, eax + 256 | |
| movupd xmm6, [edi] //Iteration3D by calcMissed not aligned16?! | |
| movsd xmm7, [edi + 16] | |
| movupd [edi - 32], xmm6 //X=C | |
| movupd [edi - 16], xmm7 | |
| cmp dword [esi - 104], 0 //DoJulia:+152 | |
| jz @sjup | |
| movupd xmm2, [esi + 64] | |
| movsd xmm3, [esi + 80] | |
| movupd [edi + 24], xmm2 //J=Ju | |
| movsd [edi + 40], xmm3 | |
| jmp @skipIfJulia | |
| @sjup: | |
| movupd [edi + 24], xmm6 //J=C | |
| movsd [edi + 40], xmm7 | |
| @skipIfJulia: | |
| mulpd xmm6, xmm6 | |
| mulsd xmm7, xmm7 | |
| CVTSS2SD xmm5, [edi + 72] //RStop in double | |
| addsd xmm7, xmm6 | |
| shufpd xmm6, xmm6, 1 | |
| movsd [edi - 40], xmm5 | |
| addsd xmm7, xmm6 //xmm7=Rout | |
| movsd [esi - 64], xmm7 //OTrap=Rout | |
| movsd [edi + 56], xmm7 //Rout | |
| xor ebx, ebx | |
| mov [esi - 48], ebx //bFirstIt := 0; +208 | |
| mov [edi + 64], ebx //ItresultI:=0 +64 | |
| movzx ebx, word [esi - 66] //n := iStartFrom | |
| mov eax, [edi + ebx * 4 + 100] //fHPVar[0] +100 | |
| mov [edi + 48], eax //PVars: +48 | |
| mov eax, [edi + ebx * 4 + 76] //i:=nHybrid[0] +76 | |
| and eax, $7FFFFFFF | |
| mov [esi - 44], eax //btmp | |
| @Repeat: | |
| movsd [edi - 48], xmm7 //Rold := Rout | |
| cmp dword [esi - 44], 0 | |
| jnle @up2 | |
| @While: | |
| inc ebx | |
| cmp bx, word [esi - 106] //5 wEndTo: Word; //+150 | |
| jle @up3 | |
| movzx ebx, word [esi - 68] //n := iRepeatFrom | |
| @up3: | |
| mov eax, [edi + ebx * 4 + 76] //i := nHybrid[n]; +76 | |
| and eax, $7FFFFFFF | |
| jle @While | |
| mov [esi - 44], eax //was btmp, now own var | |
| mov eax, [edi + ebx * 4 + 100] //fHPVar:array[0..5] of Pointer; | |
| mov [edi + 48], eax //PVars: +48 | |
| @up2: | |
| lea eax, edi - 8 // w | |
| push eax | |
| push edi | |
| lea edx, edi - 24 | |
| lea ecx, edi - 16 | |
| add eax, -24 | |
| call [edi + ebx * 4 + 124] //fHybrid[0..5] of ThybridIteration2; //+124 fp overflow: it3dex.z > 1eXXX ! | |
| dec [esi - 44] //Dec(i) | |
| cmp [edi + ebx * 4 + 76], 0 | |
| jl @Repeat //SkipMaxItTest | |
| movupd xmm6, [edi - 32] | |
| movupd xmm7, [edi - 16] | |
| mulpd xmm6, xmm6 | |
| mulsd xmm7, xmm7 //4D: mulpd | |
| addsd xmm7, xmm6 //4D: addpd | |
| shufpd xmm6, xmm6, 1 //4D: pshufd xmm6, xmm7, $4E | |
| addsd xmm7, xmm6 //xmm7=Rout | |
| movsd xmm5, xmm7 | |
| minsd xmm5, qword [esi - 64] | |
| movsd [edi + 56], xmm7 //Rout | |
| movsd [esi - 64], xmm5 //OTrap | |
| inc dword [edi + 64] //Inc(ItResultI) | |
| mov eax, [edi + 64] | |
| cmp eax, [edi + 68] //maxIt: +68 | |
| jnl @out | |
| comisd xmm7, [edi - 40] //RStop | |
| jc @Repeat | |
| @out: | |
| cmp byte [esi - 108], 0 //CalcSIT: +148 | |
| jz @NoCalcSITout | |
| mov eax, edi | |
| mov edx, ebx | |
| call CalcSmoothIterations //(PIt3D: TPIteration3D; n: Integer); | |
| @NoCalcSITout: | |
| pop edi | |
| pop esi | |
| pop edx | |
| pop ecx | |
| pop ebx | |
| pop eax | |
| end; | |
| function doHybridDESSE2(PIteration3D: TPIteration3D): Double; //result in st(0) new ext version | |
| asm | |
| push eax | |
| push ebx | |
| push ecx | |
| push edx | |
| push esi | |
| push edi //x = edi-32 y = edi-24 .. Rold = edi - 48, Rstop = edi - 40, (i = edi + 212 = btmp = esi - 44) | |
| mov edi, eax | |
| lea esi, eax + 256 | |
| movupd xmm6, [edi] //Iteration3D by calcMissed not aligned16?! | |
| movsd xmm7, [edi + 16] | |
| movupd [edi - 32], xmm6 //X=C | |
| movupd [edi - 16], xmm7 | |
| cmp dword [esi - 104], 0 //DoJulia:+152 | |
| jz @sjup | |
| movupd xmm2, [esi + 64] | |
| movsd xmm3, [esi + 80] | |
| movupd [edi + 24], xmm2 //J=Ju | |
| movsd [edi + 40], xmm3 | |
| jmp @skipIfJulia | |
| @sjup: | |
| movupd [edi + 24], xmm6 //J=C | |
| movsd [edi + 40], xmm7 | |
| @skipIfJulia: | |
| mulpd xmm6, xmm6 | |
| mulsd xmm7, xmm7 | |
| CVTSS2SD xmm5, [edi + 72] //RStop in double | |
| addsd xmm7, xmm6 | |
| shufpd xmm6, xmm6, 1 | |
| movsd [edi - 40], xmm5 | |
| addsd xmm7, xmm6 //xmm7=Rout | |
| movsd [esi - 64], xmm7 //OTrap=Rout | |
| movsd [edi + 56], xmm7 //Rout | |
| movsd [edi - 48], xmm7 //Rold := Rout | |
| xor ebx, ebx //n:=0 | |
| mov [edi + 208], ebx //mov [esi - 48], ebx //bFirstIt := 0; +208 | |
| mov [edi + 64], ebx //ItresultI:=0 +64 | |
| movzx ebx, word [esi - 66] //n := iStartFrom | |
| mov eax, [edi + ebx * 4 + 100] //fHPVar[n] +100 | |
| mov [edi + 48], eax //PVars: +48 | |
| mov eax, [edi + ebx * 4 + 76] //i:=nHybrid[n] +76 | |
| and eax, $7FFFFFFF | |
| mov [esi - 44], eax | |
| mov eax, [esi - 96] //DEoption +160 | |
| and eax, $38 // case (DEoption and $38) of | |
| sub eax, 16 | |
| jnz @UU1 | |
| fld qword [edi + 56] // 16: w := Rout; | |
| jmp @UU2 | |
| @UU1: | |
| sub eax, 16 | |
| jnz @UU | |
| fld1 | |
| fstp qword [esi - 24] // deriv1 | |
| fldz // 32: begin Deriv1 := 1; Deriv2 := 0; Deriv3 := 0; end; | |
| fst qword [esi - 16] | |
| fst qword [esi - 8] | |
| jmp @UU2 | |
| @UU: | |
| fld1 // else w := 1; | |
| @UU2: | |
| fstp qword [edi - 8] //w := Rout,1,0 | |
| @Repeat: | |
| movsd [edi - 48], xmm7 //Rold := Rout | |
| cmp dword [esi - 44], 0 | |
| jnle @up2 | |
| @While: | |
| inc ebx | |
| cmp bx, word [esi - 106] //5 wEndTo: Word; //+150 | |
| jle @up3 | |
| movzx ebx, word [esi - 68] //n := iRepeatFrom | |
| @up3: | |
| mov eax, [edi + ebx * 4 + 76] //i := nHybrid[n]; +76 | |
| and eax, $7FFFFFFF | |
| jle @While | |
| mov [esi - 44], eax | |
| mov eax, [edi + ebx * 4 + 100] //fHPVar:array[0..5] of Pointer; | |
| mov [edi + 48], eax //PVars: +48 | |
| @up2: | |
| lea eax, edi - 8 //was: esp + 24 w | |
| push eax | |
| push edi | |
| lea edx, edi - 24 //was: esp + 16 y | |
| lea ecx, edi - 16 //was: esp + 24 z | |
| add eax, -24 // x | |
| call [edi + ebx * 4 + 124] //fHybrid[0..5] of ThybridIteration2; //+124 error in called function sometimes!!! | |
| dec [esi - 44] //Dec(i) //Write off...??? bug in call... of mandbox or menger??! abox as testhybrid! esi has changed? | |
| cmp [edi + ebx * 4 + 76], 0 | |
| jl @Repeat //SkipMaxItTest | |
| movupd xmm6, [edi - 32] | |
| movupd xmm7, [edi - 16] | |
| mulpd xmm6, xmm6 | |
| mulsd xmm7, xmm7 | |
| addsd xmm7, xmm6 | |
| shufpd xmm6, xmm6, 1 | |
| addsd xmm7, xmm6 //xmm7=Rout | |
| movsd xmm5, xmm7 | |
| minsd xmm5, qword [esi - 64] | |
| movsd [edi + 56], xmm7 //Rout | |
| movsd [esi - 64], xmm5 //OTrap | |
| inc dword [edi + 64] //Inc(ItResultI) | |
| mov eax, [edi + 64] | |
| cmp eax, [edi + 68] //maxIt: +68 | |
| jnl @out | |
| comisd xmm7, [edi - 40] //RStop | |
| jc @Repeat | |
| @out: | |
| mov eax, [esi - 96] //DEoption +160 if (DEoption and $38) = 32 then | |
| and eax, 38 | |
| sub eax, 32 | |
| jnz @JU1 | |
| fld qword [edi + 56] //rout Result := Sqrt(Rout) * 0.5 * Ln(Rout) / RoutDeriv | |
| fldln2 | |
| fld st(1) //rout,ln2,rout | |
| fyl2x //ln(rout),rout | |
| fxch | |
| fsqrt | |
| fmulp | |
| fmul cs05 | |
| fdiv qword [esi - 24] //Deriv1 | |
| jmp @UU6 | |
| @JU1: | |
| mov eax, [esi - 96] //DEoption +160 | |
| and eax, 7 | |
| sub eax, 4 | |
| jnz @UU3 //Result := Abs(X3) * Ln(Abs(X3)) / X4; | |
| fld qword [edi - 16] //X3 | |
| fabs | |
| fldln2 | |
| fld st(1) //absX3,ln2,absX3 | |
| fyl2x //ln(absX3),absX3 | |
| fmulp | |
| fdiv qword [edi - 8] //Result | |
| jmp @UU6 | |
| @UU3: | |
| sub eax, 3 | |
| jnz @UU4 //Result := Sqrt(Rout/RStop) * Power(PDouble(Integer(PVar) - 16)^, -ItResultI); | |
| mov eax, [edi + 48] | |
| fild dword [edi + 64] //ItResultI | |
| fchs //-ItresultI | |
| fld [eax - 16] //(Pvar-16)^ (= scale or something) | |
| fldln2 //power function x,pow | |
| fxch | |
| fyl2x | |
| fxch | |
| fmulp | |
| fldl2e | |
| fmulp | |
| fld st(0) | |
| frndint | |
| fsub st(1), st(0) | |
| fxch | |
| f2xm1 | |
| fld1 | |
| faddp | |
| fscale | |
| fstp st(1) //end of power function | |
| fld qword [edi + 56] | |
| fdiv dword [edi + 72] //rout/rstop,pow | |
| fsqrt | |
| fmulp | |
| jmp @UU6 | |
| @UU4: // else Result := Sqrt(Rout) / Abs(X4); | |
| fld qword [edi + 56] | |
| fsqrt | |
| fld qword [edi - 8] | |
| fabs | |
| fdivp | |
| @UU6: | |
| cmp byte [esi - 108], 0 //CalcSIT: +148 | |
| jz @NoCalcSITout | |
| mov eax, edi | |
| mov edx, ebx | |
| call CalcSmoothIterations //(PIt3D: TPIteration3D; n: Integer); | |
| @NoCalcSITout: | |
| pop edi | |
| pop esi | |
| pop edx | |
| pop ecx | |
| pop ebx | |
| pop eax | |
| end; | |
| procedure HybridItTricorn(var x, y, z, w: Double; PIteration3D: TPIteration3D); | |
| asm | |
| push esi | |
| push edi | |
| fld qword [edx] | |
| fld st(0) | |
| fmul st(0), st(1) // y*y, y | |
| fld qword [eax] // x, y*y, y | |
| mov esi, [ebp + 8] // PIteration3D | |
| fld st(0) // x, x, y*y, y | |
| fmul st(0), st(1) // x*x, x, y*y, y | |
| fld qword [ecx] // z, x*x, x, y*y, y | |
| fld st(0) | |
| mov edi, [esi + 48] | |
| fmul st(0), st(1) // z*z, z, x*x, x, y*y, y | |
| faddp st(4), st(0) // z, x*x, x, y*y+z*z, y | |
| fmul st(0), st(2) // z*x, x*x, x, y*y+z*z, y | |
| fmul qword [edi - 16] | |
| fld qword [esi + 40] | |
| fmul qword [edi - 24] | |
| faddp | |
| fstp qword [ecx] // x*x, x, y*y+z*z, y | |
| fsubrp st(2), st(0) // x, x*x-y*y-z*z, y | |
| fmulp st(2), st(0) // x*x-y*y-z*z, y*x | |
| fadd qword [esi + 24] | |
| fstp qword [eax] // y*x | |
| fadd st(0), st(0) | |
| fadd qword [esi + 32] | |
| fstp qword [edx] | |
| pop edi | |
| pop esi | |
| end; | |
| procedure HybridQuatSSE2(var x, y, z, w: Double; PIteration3D: TPIteration3D); | |
| asm | |
| push esi | |
| push edi | |
| mov esi, [ebp + 8] | |
| mov edi, [esi + 48] //PVars | |
| movupd xmm0, [eax] //x,y | |
| movupd xmm1, [ecx] //z,w | |
| movapd xmm6, xmm0 //x,y | |
| movapd xmm5, xmm1 //z,w | |
| movapd xmm3, xmm1 //z,w | |
| xorpd xmm4, xmm4 //0,0 | |
| mulpd xmm6, xmm6 //xx,yy | |
| mulpd xmm5, xmm5 //zz,ww | |
| movupd xmm2, [edx] //y,z | |
| subsd xmm4, xmm5 //-zz | |
| shufpd xmm3, xmm0, 1 //w,x | |
| shufpd xmm4, xmm5, 2 //-zz, ww | |
| mulpd xmm2, xmm0 //yx, zy | |
| addpd xmm4, xmm6 //xx-zz, yy+ww | |
| mulpd xmm0, xmm1 //xz, yw | |
| mulpd xmm3, xmm1 //wz, xw | |
| pshufd xmm6, xmm0, $4E //yw, xz | |
| pshufd xmm1, xmm4, $4E //yy+ww, xx-zz | |
| mulsd xmm6, [edi - 16] //ywMul, xz | |
| addpd xmm2, xmm3 //yx+wz, zy+xw -> y, w | |
| addsd xmm6, xmm0 //ywMul + xz -> z | |
| subpd xmm4, xmm1 //xx-zz-yy-ww -> x | |
| addpd xmm2, xmm2 //y,w | |
| addsd xmm6, xmm6 //z | |
| shufpd xmm2, xmm2, 1 //w, y | |
| movupd xmm5, [esi + 24] //J1,J2 | |
| addsd xmm2, [edi - 24] | |
| addsd xmm2, [esi - 56] //+J4 | |
| shufpd xmm6, xmm2, 0 //z, w | |
| shufpd xmm4, xmm2, 2 //x, y | |
| addsd xmm6, [esi + 40] //+J3 | |
| addpd xmm4, xmm5 //+J1,2 | |
| movupd [eax], xmm4 | |
| movupd [ecx], xmm6 | |
| pop edi | |
| pop esi | |
| end; | |
| procedure HybridItIntPow2(var x, y, z, w: Double; PIteration3D: TPIteration3D); //sine bulb | |
| asm | |
| push esi | |
| push edi | |
| fld qword [ecx] | |
| fld qword [edx] | |
| fld qword [eax] //x,y,z | |
| mov esi, [ebp + 8] //PIteration3D | |
| fld st(1) //y,x,y,z | |
| fmul st(0), st(2) // y*y,x,y,z | |
| fld st(1) // x,y*y,x,y,z | |
| fmul st(0), st(2) // x*x, y*y,x,y,z | |
| fld st(0) // x*x, x*x, y*y,x,y,z | |
| fadd st(0), st(2) // xx+yy, xx, yy,x,y,z | |
| fld st(0) // xx+yy, xx+yy, xx, yy,x,y,z | |
| fsqrt | |
| mov edi, [esi + 48] | |
| fmul qword [edi - 16] //*dOption1=Zmul | |
| fmul st(0), st(6) //*z | |
| fadd st(0), st(0) //*2 | |
| fadd qword [esi + 40] //+cz nly for test | |
| fstp qword [ecx] //xx+yy, xx, yy,x,y,z | |
| fld st(5) //z, xx+yy, xx, yy,x,y,z | |
| fmulp st(6), st(0) //xx+yy, xx, yy,x,y,z*z | |
| fld st(0) //xx+yy, xx+yy, xx, yy,x,y,z*z | |
| fsubrp st(6), st(0) //xx+yy, xx, yy,x,y, a - z*z | |
| fdivp st(5), st(0) //xx, yy,x,y, a - z*z / a = a | |
| fsubrp //xx-yy,x,y, a | |
| fmul st(0), st(3) //a(xx-yy),x,y, a | |
| fadd qword [esi + 24] | |
| fstp qword [eax] //x,y, a | |
| fmulp | |
| fmulp //x*y*a | |
| fadd st(0), st(0) //*2 | |
| fadd qword [esi + 32] //+ cy only for test | |
| fstp qword [edx] | |
| pop edi | |
| pop esi //SineP2 | |
| end; | |
| procedure HybridItIntPow2SSE2(var x, y, z, w: Double; PIteration3D: TPIteration3D); | |
| asm | |
| push esi | |
| push ebx | |
| mov esi, [ebp + 8] | |
| movlpd xmm0, [eax] // x | |
| movhpd xmm0, [edx] // x, y | |
| movlpd xmm1, [ecx] // z | |
| movapd xmm2, xmm0 | |
| mov ebx, [esi + 48] //Pvars | |
| movsd xmm3, xmm1 | |
| mulpd xmm2, xmm2 // S1, S2 | |
| mulsd xmm3, xmm3 // S3 | |
| pshufd xmm5, xmm2, $4E // S2, S1 | |
| movapd xmm4, xmm5 | |
| addpd xmm5, xmm2 // S1+S2 | |
| subsd xmm2, xmm4 // S1-S2 | |
| movapd xmm6, xmm5 | |
| mulsd xmm1, [ebx - 16] // z*dZmul | |
| sqrtsd xmm4, xmm6 // Sqrt(S2+S1) | |
| addsd xmm1, xmm1 // z*dZmul*2 | |
| subsd xmm6, xmm3 // (S1+S2)-S3 | |
| mulsd xmm1, xmm4 // z*dZmul*2*Sqrt(S2+S1) | |
| movsd xmm3, [edx] // y | |
| addsd xmm1, [esi + 40] // z*dZmul*Sqrt(S2+S1)+J3 = z | |
| divsd xmm6, xmm5 // (XT-S3)/XT = XT | |
| addsd xmm3, xmm3 // y*2 | |
| movsd [ecx], xmm1 // z | |
| mulsd xmm3, xmm0 // y*2*x | |
| mulsd xmm2, xmm6 // (S1-S2)*XT | |
| mulsd xmm3, xmm6 // y*2*x*XT | |
| addsd xmm2, [esi + 24] // (S1-S2)*XT+J1 = x | |
| addsd xmm3, [esi + 32] // y*2*x*XT+J2 = y | |
| movsd [eax], xmm2 // x | |
| movsd [edx], xmm3 // y | |
| pop ebx | |
| pop esi | |
| end; | |
| procedure HybridFloatPow(var x, y, z, w: Double; PIteration3D: TPIteration3D); | |
| asm | |
| push esi | |
| push edi | |
| mov esi, [ebp + 8] //PIteration3D | |
| mov edi, [esi + 48] | |
| fld qword [edi - 16] | |
| fld qword [edx] | |
| fld qword [eax] | |
| fld st(1) | |
| fld st(1) | |
| fpatan //theta, x, y, pow | |
| fmul st, st(3) | |
| fsincos //Costheta, Sintheta, x, y, pow | |
| fld qword [ecx] //z,Costheta, Sintheta,x,y,pow | |
| fxch st(3) //x,Costheta, Sintheta,z,y,pow | |
| fmul st, st | |
| fxch st(4) //y,Costheta, Sintheta,z,xx,pow | |
| fmul st, st | |
| faddp st(4), st //Costheta,Sintheta,z,xx+yy,pow | |
| fxch st(2) //z,Sintheta,Costheta,xx+yy,pow | |
| fxch //Sintheta,z,Costheta,xx+yy,pow | |
| fxch st(3) //xx+yy,z,Costheta,Sintheta,pow | |
| fsqrt | |
| fpatan //phi,Costheta, Sintheta,pow | |
| fmul st, st(3) | |
| fsincos //Cosphi,Sinphi,Costheta,Sintheta,pow | |
| fxch st(4) //pow,Sinphi,Costheta,Sintheta,Cosphi | |
| fmul qword [edi - 8] //*0.5 because of Rout=sqr(R) | |
| fld qword [esi + 56] //SqrRadius, pow*0.5,Sinphi,Costheta,Sintheta,Cosphi | |
| fldln2 //power function x,pow | |
| fxch | |
| fyl2x | |
| fxch | |
| fmulp | |
| fldl2e | |
| fmulp | |
| fld st(0) | |
| frndint | |
| fsub st(1), st(0) | |
| fxch | |
| f2xm1 | |
| fld1 | |
| faddp | |
| fscale | |
| fstp st(1) //NewRadius,Sinphi,Costheta,Sintheta,Cosphi | |
| fxch st(2) //Costheta,Sinphi,NewRadius,Sintheta,Cosphi | |
| fmul st, st(4) | |
| fmul st, st(2) | |
| fadd qword [esi + 24] | |
| fstp qword [eax] //Sinphi,NewRadius,Sintheta,Cosphi | |
| fxch st(3) //Cosphi,NewRadius,Sintheta,Sinphi | |
| fmulp st(2), st //NewRadius,Sintheta*Cosphi,Sinphi | |
| fmul st(1), st | |
| fmulp st(2), st //Sintheta*Cosphi*r,Sinphi*r | |
| fadd qword [esi + 32] | |
| fstp qword [edx] | |
| fmul qword [edi - 24] | |
| fadd qword [esi + 40] | |
| fstp qword [ecx] | |
| pop edi | |
| pop esi | |
| end; | |
| procedure HybridItIntPow3(var x, y, z, w: Double; PIteration3D: TPIteration3D); | |
| asm | |
| push esi | |
| push edi | |
| mov esi, [ebp + 8] //PIteration3D | |
| fld qword [edx] | |
| fmul st, st // y*y | |
| fld qword [eax] // x, y*y | |
| mov edi, [esi + 48] // PVars | |
| fmul st, st // x*x, y*y | |
| fld st(0) // x*x, x*x, y*y | |
| fadd st(0), st(2) // x*x+y*y = R, x*x = sx, y*y = sy | |
| fld qword [ecx] | |
| fmul st, st // sz, R, sx, sy | |
| fld qword [edi + 120] // 3, sz, R, sx, sy | |
| fld st(1) // sz, 3, sz, R, sx, sy | |
| fmul st(0), st(1) // 3*sz, 3, sz, R, sx, sy | |
| fld st(3) | |
| fadd qword [edi + 24] | |
| fdivp // 3*sz/R, 3, sz, R, sx, sy | |
| fld1 | |
| fsubrp | |
| fld st(1) // 3, A, 3, sz, R, sx, sy | |
| fmul st(0), st(6) // 3*sy, .. | |
| fsubr st(0), st(5) // sx-3*sy, .. | |
| fmul st(0), st(1) // A*(sx-3*sy), A, 3, sz, R, sx, sy | |
| fmul qword [eax] | |
| fadd qword [esi + 24] | |
| fstp qword [eax] // A, 3, sz, R, sx, sy | |
| fxch st(4) // sx, 3, sz, R, A, sy | |
| fmul st(0), st(1) // 3*sx, 3, sz, R, A, sy | |
| fsubrp st(5), st(0) // 3, sz, R, A, 3*sx-sy was: sy-3*sx! | |
| fmulp st(2), st(0) // sz, 3*R, A, 3*sx-sy | |
| fsubrp // sz-3*R, A, 3*sx-sy | |
| fmul qword [ecx] | |
| fmul qword [edi - 16] //*dZmul | |
| fsubr qword [esi + 40] | |
| fstp qword [ecx] // A, 3*sx-sy | |
| fmulp // A*(3*sx-sy) | |
| fmul qword [edx] | |
| fadd qword [esi + 32] | |
| fstp qword [edx] | |
| pop edi | |
| pop esi | |
| end; | |
| procedure HybridItIntPow4(var x, y, z, w: Double; PIteration3D: TPIteration3D); | |
| asm | |
| push esi | |
| push edi | |
| mov esi, [ebp + 8] //PIteration3D | |
| fld qword [edx] | |
| fmul st, st // y*y | |
| fld qword [eax] // x, y*y | |
| mov edi, [esi + 48] // PVars | |
| fld st | |
| fmul st, st // x*x, x, y*y | |
| fld st(0) // x*x, x*x, x, y*y | |
| fadd st(0), st(3) // x*x+y*y = R, sx, x, sy | |
| fld qword [ecx] | |
| fmul st, st // sz, R, sx, x, sy | |
| fld qword [edi + 144] // 6, sz, R, sx, x, sy | |
| fmul st, st(2) // 6*R, sz, R, sx, x, sy | |
| fsubr st, st(1) // sz - 6*R, sz, R, sx, x, sy | |
| fmul st(0), st(1) // sz * (sz - 6 * R), sz, R, sx, x, sy | |
| fld st(2) | |
| fmul st, st // R*R, sz * (sz - 6 * R), sz, R, sx, x, sy | |
| fadd qword [edi + 24] // 24-112 +1e-40 | |
| fdivp // sz * (sz - 6 * R) / R*R, sz, R, sx, x, sy | |
| fld1 | |
| faddp // A, sz, R, sx, x, sy | |
| fld st(5) // sy, A, sz, R, sx, x, sy | |
| fmul qword [edi + 144] // 6*sy, A, sz, R, sx, x, sy | |
| fsubr st, st(4) // sx-6*sy, A, sz, R, sx, x, sy | |
| fmul st(0), st(4) // sx*(sx-6*sy), A, sz, R, sx, x, sy | |
| fld st(6) | |
| fmul st, st | |
| faddp // sy*sy + sx*(sx-6*sy), A, sz, R, sx, x, sy | |
| fmul st, st(1) | |
| fadd qword [esi + 24] | |
| fstp qword [eax] // A, sz, R, sx, x, sy | |
| fxch st(2) // R, sz, A, sx, x, sy | |
| fsubr st(1), st // R, R-sz, A, sx, x, sy | |
| fsqrt | |
| fmulp // sqrt(R)*(R-sz), A, sx, x, sy | |
| fmul qword [ecx] | |
| fmul qword [edi + 128] //*4 | |
| fmul qword [edi - 16] //*dZmul | |
| fadd qword [esi + 40] | |
| fstp qword [ecx] // A, sx, x, sy | |
| fxch // sx, A, x, sy y := 4 * x * y * A * (sx - sy) + J2; | |
| fsubrp st(3), st // A, x, sx-sy | |
| fmulp // A*x, sx-sy | |
| fmulp | |
| fmul qword [edi + 128] //*4 | |
| fmul qword [edx] //*y | |
| fadd qword [esi + 32] | |
| fstp qword [edx] | |
| pop edi | |
| pop esi | |
| end; | |
| procedure HybridIntP5(var x, y, z, w: Double; PIteration3D: TPIteration3D); | |
| asm | |
| push esi | |
| push edi | |
| mov esi, [ebp + 8] //PIteration3D | |
| fld qword [edx] | |
| fmul st, st // y*y | |
| fld qword [eax] // x, y*y | |
| mov edi, [esi + 48] // PVars | |
| fmul st, st // x*x, y*y | |
| fld st // x*x, x*x, y*y | |
| fadd st, st(2) // x*x+y*y = R, sx, sy | |
| fld qword [ecx] | |
| fmul st, st // sz, R, sx, sy | |
| fld qword [edi + 136] // 5, sz, R, sx, sy | |
| fld st // 5, 5, sz, R, sx, sy | |
| fld st(2) | |
| fmul st, st(4) | |
| fadd st, st // sz*R*2, 5, 5, sz, R, sx, sy | |
| fld st(3) | |
| fmul st, st | |
| fsubrp // sz*sz - sz*R*2, 5, 5, sz, R, sx, sy | |
| fmulp // (sz*sz - sz*R*2) * 5, 5, sz, R, sx, sy | |
| fld st(3) | |
| fmul st, st // R*R, (sz*sz - sz*R*2) * 5, 5, sz, R, sx, sy | |
| fadd qword [edi + 24] // 24-112 +1e-40 | |
| fdivp // (sz*sz - sz*R*2) * 5 / R*R, 5, sz, R, sx, sy | |
| fld1 | |
| faddp // A, 5, sz, R, sx, sy | |
| fld st(4) // sx, A, 5, sz, R, sx, sy | |
| fmul qword [edi + 168] // 10*sx, A, 5, sz, R, sx, sy | |
| fsub st, st(6) // 10*sx - sy, A, 5, sz, R, sx, sy | |
| fmul st, st(6) // sy*(10*sx - sy), A, 5, sz, R, sx, sy | |
| fld st(5) | |
| fmul st, st | |
| fmul st, st(3) // 5*sx*sx, sy*(10*sx - sy), A, 5, sz, R, sx, sy | |
| fsubrp // 5*sx*sx - sy*(10*sx - sy), A, 5, sz, R, sx, sy | |
| fmul st, st(1) | |
| fmul qword [edx] | |
| fadd qword [esi + 32] | |
| fstp qword [edx] // A, 5, sz, R, sx, sy | |
| fld st(3) | |
| fmul st, st(2) | |
| fadd st, st // 10*R, A, 5, sz, R, sx, sy | |
| fsubr st, st(3) // sz-10*R, A, 5, sz, R, sx, sy | |
| fmulp st(3), st // A, 5, sz*(sz-10*R), R, sx, sy | |
| fxch st(3) // R, 5, sz*(sz-10*R), A, sx, sy | |
| fmul st, st // | |
| fmul st, st(1) // | |
| faddp st(2), st // 5, sz*(sz-10*R)+5*R*R, A, sx, sy | |
| fld st(4) | |
| fmul st, st | |
| fmul st, st(1) // 5*sy*sy, 5, sz*(sz-10*R)+5*R*R, A, sx, sy | |
| fxch st(5) // sy, 5, sz*(sz-10*R)+5*R*R, A, sx, 5*sy*sy | |
| fmulp | |
| fadd st, st // 10*sy, sz*(sz-10*R)+5*R*R, A, sx, 5*sy*sy | |
| fsubr st, st(3) // sx-10*sy, sz*(sz-10*R)+5*R*R, A, sx, 5*sy*sy | |
| fmulp st(3), st // sz*(sz-10*R)+5*R*R, A, sx*(sx-10*sy), 5*sy*sy | |
| fmul qword [ecx] | |
| fmul qword [edi - 16] //*dZmul | |
| fadd qword [esi + 40] | |
| fstp qword [ecx] // A, sx*(sx-10*sy), 5*sy*sy | |
| fmul qword [eax] | |
| fxch // sx*(sx-10*sy), A*x, 5*sy*sy | |
| faddp st(2), st // A*x, sx*(sx-10*sy)+5*sy*sy | |
| fmulp | |
| fadd qword [esi + 24] | |
| fstp qword [eax] | |
| pop edi | |
| pop esi | |
| end; | |
| procedure HybridIntP6(var x, y, z, w: Double; PIteration3D: TPIteration3D); | |
| asm | |
| push esi | |
| push edi | |
| mov esi, [ebp + 8] //PIteration3D | |
| fld qword [edx] | |
| fmul st, st // y*y | |
| fld qword [eax] // x, y*y | |
| mov edi, [esi + 48] // PVars | |
| fmul st, st // x*x, y*y | |
| fld st // x*x, x*x, y*y | |
| fadd st, st(2) // x*x+y*y = R, sx, sy | |
| fld qword [ecx] | |
| add edi, 112 | |
| fmul st, st // sz, R, sx, sy | |
| fld qword [edi + 176-112] // 15, sz, R, sx, sy | |
| fld st // 15, 15, sz, R, sx, sy | |
| fmul st, st(3) // 15*R, | |
| fsubr st, st(2) // sz-R*15, 15, sz, R, sx, sy | |
| fmul st, st(2) | |
| fld st(3) | |
| fmul st, st // R*R, sz*(sz-R*15), 15, sz, R, sx, sy | |
| fxch | |
| fld st(1) // R*R, sz*(sz-R*15), R*R, 15, sz, R, sx, sy | |
| fmulp st(3), st // sz*(sz-R*15), R*R, 15*R*R, sz, R, sx, sy | |
| faddp st(2), st // R*R, 15*R*R+sz*(sz-R*15), sz, R, sx, sy | |
| fxch // 15*R*R+sz*(sz-R*15), R*R, sz, R, sx, sy | |
| fmul st, st(2) // sz*(15*R*R+sz*(sz-R*15)), R*R, sz, R, sx, sy | |
| fld st(1) | |
| fmul st, st(4) // R*R*R, sz*(15*R*R+sz*(sz-R*15)), R*R, sz, R, sx, sy | |
| fadd qword [edi + 24-112] // 24-112 +1e-40 | |
| fdivp // sz*(15*R*R+sz*(sz-R*15)) / R*R*R, R*R, sz, R, sx, sy | |
| fld1 | |
| fsubrp // 1 - sz*(15*R*R+sz*(sz-R*15)) / R*R*R, R*R, sz, R, sx, sy | |
| fld st(5) // sy, A, R*R, sz, R, sx, sy | |
| fmul qword [edi + 168-112] // 10*sy, A, R*R, sz, R, sx, sy | |
| fld st(5) | |
| fmul qword [edi + 120-112] // 3*sx, 10*sy, A, R*R, sz, R, sx, sy | |
| fsubrp // 3*sx-10*sy, A, R*R, sz, R, sx, sy | |
| fmul st, st(5) // sx*(3*sx-10*sy), A, R*R, sz, R, sx, sy | |
| fld st(6) // sy, | |
| fmul st, st | |
| fmul qword [edi + 120-112] // 3*sy*sy, sx*(3*sx-10*sy), A, R*R, sz, R, sx, sy | |
| faddp // 3*sy*sy+sx*(3*sx-10*sy), A, R*R, sz, R, sx, sy | |
| fmul st, st(1) | |
| fmul qword [edx] | |
| fmul qword [eax] // z := PDouble(Integer(PVar) - 16)^*2*z*Sqrt(R)*(sz*(3*sz - 10*R) + 3*R*R) + J3; | |
| fadd st, st // x := A*(S1*S1*(S1 - 15*S2) + S2*S2*(15*S1 - S2)) + J1; | |
| fadd qword [esi + 32] | |
| fstp qword [edx] // A, R*R, sz, R, sx, sy | |
| fld st(3) | |
| fmul qword [edi + 168-112] | |
| fld st(3) // sz, 10*R, A, R*R, sz, R, sx, sy | |
| fmul qword [edi + 120-112] | |
| fsubrp // 3*sz-10*R, A, R*R, sz, R, sx, sy | |
| fmulp st(3), st // A, R*R, sz*(3*sz-10*R), R, sx, sy | |
| fxch | |
| fmul qword [edi + 120-112] // 3*R*R, A, sz*(3*sz-10*R), R, sx, sy | |
| faddp st(2), st // A, sz*(3*sz-10*R)+3*R*R, R, sx, sy | |
| fld qword [edi + 176-112] // 15 | |
| fld st // 15, 15, A, sz*(3*sz-10*R)+3*R*R, R, sx, sy | |
| fmul st, st(5) | |
| fsub st, st(6) | |
| fmul st, st(6) | |
| fmul st, st(6) // S2*S2*(15*S1-S2), 15, A, sz*(3*sz-10*R)+3*R*R, R, sx, sy | |
| fxch | |
| fmulp st(6), st // S2*S2*(15*S1-S2), A, sz*(3*sz-10*R)+3*R*R, R, sx, 15*sy | |
| fxch st(5) // 15*sy, A, sz*(3*sz-10*R)+3*R*R, R, sx, S2*S2*(15*S1-S2) | |
| fsubr st, st(4) // sx-15*sy, A, sz*(3*sz-10*R)+3*R*R, R, sx, S2*S2*(15*S1-S2) | |
| fmul st, st(4) | |
| fmulp st(4), st // A, sz*(3*sz-10*R)+3*R*R, R, sx*sx*(sx-15*sy), S2*S2*(15*S1-S2) | |
| fxch st(4) // S2*S2*(15*S1-S2), sz*(3*sz-10*R)+3*R*R, R, sx*sx*(sx-15*sy), A | |
| faddp st(3), st // sz*(3*sz-10*R)+3*R*R, R, S2*S2*(15*S1-S2)+sx*sx*(sx-15*sy), A | |
| fxch | |
| fsqrt | |
| fmulp // (sz*(3*sz-10*R)+3*R*R)*sqrt(R), S2*S2*(15*S1-S2)+sx*sx*(sx-15*sy), A | |
| fmul qword [ecx] | |
| fmul qword [edi - 16-112] //*dZmul | |
| fadd st, st | |
| fadd qword [esi + 40] | |
| fstp qword [ecx] // S2*S2*(15*S1-S2)+sx*sx*(sx-15*sy), A | |
| fmulp | |
| fadd qword [esi + 24] | |
| fstp qword [eax] | |
| pop edi | |
| pop esi | |
| end; | |
| procedure HybridIntP7(var x, y, z, w: Double; PIteration3D: TPIteration3D); | |
| asm | |
| push esi | |
| push edi | |
| mov esi, [ebp + 8] //PIteration3D | |
| fld qword [edx] | |
| fmul st, st // y*y | |
| fld qword [eax] // x, y*y | |
| mov edi, [esi + 48] // PVars | |
| fmul st, st // x*x, y*y | |
| fld st // x*x, x*x, y*y | |
| fadd st, st(2) // x*x+y*y = R, sx, sy | |
| fld qword [ecx] | |
| add edi, 112 | |
| fmul st, st // sz, R, sx, sy | |
| fld st(1) // R, sz, R, sx, sy | |
| fmul qword [edi + 136-112] // 5R, | |
| fsubr st, st(1) // sz-5R, sz, R, sx, sy | |
| fmul st, st(1) // sz(sz-5R), sz, R, sx, sy | |
| fld st(2) | |
| fmul st, st // R*R, sz(sz-5R), sz, R, sx, sy | |
| fxch // sz(sz-5R), R*R, sz, R, sx, sy | |
| fld st(1) // R*R, sz(sz-5R), R*R, sz, R, sx, sy | |
| fmul qword [edi + 120-112] // 3*R*R, sz(sz-5R), R*R, sz, R, sx, sy | |
| faddp // 3RR+sz(sz-5R), R*R, sz, R, sx, sy | |
| fmul st, st(2) // sz(3RR+sz(sz-5R)), R*R, sz, R, sx, sy | |
| fmul qword [edi + 152-112] | |
| fld st(1) | |
| fmul st, st(4) // R*R*R, 7sz(3RR+sz(sz-5R)), R*R, sz, R, sx, sy | |
| fadd qword [edi + 24-112] // 24-112 +1e-40 | |
| fdivp // 7sz(3RR+sz(sz-5R))/RRR, R*R, sz, R, sx, sy | |
| fld1 | |
| fsubrp // A, R*R, sz, R, sx, sy | |
| fld st(5) // sy, A, R*R, sz, R, sx, sy | |
| fmul qword [edi + 200-112] // 35*sy, A, R*R, sz, R, sx, sy | |
| fld st(5) | |
| fmul qword [edi + 152-112] // 7*sx, 35*sy, A, R*R, sz, R, sx, sy | |
| fsubrp // 7*sx-35*sy, A, R*R, sz, R, sx, sy | |
| fmul st, st(5) // sx*(7*sx-35*sy), A, R*R, sz, R, sx, sy | |
| fld st(6) // sy, | |
| fmul st, st | |
| fmul qword [edi + 184-112] // 21*sy*sy, sx*(7*sx-35*sy), A, R*R, sz, R, sx, sy | |
| faddp // 21sysy+sx(7sx-35sy), A, R*R, sz, R, sx, sy | |
| fmul st, st(5) | |
| fld st(6) | |
| fmul st, st | |
| fmul st, st(7) // sysysy, sx(21sysy+sx(7sx-35sy)), A, R*R, sz, R, sx, sy | |
| fsubp // sx(21sysy+sx(7sx-35sy))-sysysy, A, R*R, sz, R, sx, sy | |
| fmul st, st(1) | |
| fmul qword [edx] | |
| fadd qword [esi + 32] | |
| fstp qword [edx] // A, R*R, sz, R, sx, sy | |
| fmul qword [eax] // z := J3 - PDouble(Integer(PVar) - 16)^*z*(sz*sz*sz - 7*R*(sz*(3*sz - 5*R) + R*R)); | |
| // x := A*x*(sx*(sx*(sx - 21*sy) + 35*sy*sy) - 7*sy*sy*sy) + J1; | |
| fld st(3) | |
| fmul qword [edi + 136-112] // 5R, A*x, R*R, sz, R, sx, sy | |
| fld st(3) // sz, 5R, A*x, R*R, sz, R, sx, sy | |
| fmul qword [edi + 120-112] | |
| fsubrp // 3sz-5R, A*x, R*R, sz, R, sx, sy | |
| fmul st, st(3) // sz(3sz-5R), A*x, R*R, sz, R, sx, sy | |
| faddp st(2), st // A*x, RR+sz(3sz-5R), sz, R, sx, sy | |
| fxch | |
| fmul qword [edi + 152-112] // 7(RR+sz(3sz-5R)), A*x, sz, R, sx, sy | |
| fmulp st(3), st // A*x, sz, 7R(sz(3sz-5R)+RR), sx, sy | |
| fld st(1) | |
| fmul st, st | |
| fmulp st(2), st // A*x, szszsz, 7R(RR+sz(3sz-5R)), sx, sy | |
| fxch // szszsz, A*x, 7R(RR+sz(3sz-5R)), sx, sy | |
| fsubrp st(2), st // A*x, szszsz-7R(RR+sz(3sz-5R)), sx, sy | |
| fld st(3) // sy, A*x, szszsz-7R(RR+sz(3sz-5R)), sx, sy | |
| fmul qword [edi + 184-112] // 21sy, A*x, szszsz-7R(RR+sz(3sz-5R)), sx, sy | |
| fsubr st, st(3) // sx-21sy, A*x, szszsz-7R(RR+sz(3sz-5R)), sx, sy | |
| fmul st, st(3) // sx(sx-21sy) | |
| fld st(4) // sy, sx(sx-21sy), A*x, szszsz-7R(RR+sz(3sz-5R)), sx, sy | |
| fmul st, st // sysy, sx(sx-21sy), A*x, szszsz-7R(RR+sz(3sz-5R)), sx, sy | |
| fmul qword [edi + 200-112] // 35sysy, sx(sx-21sy), A*x, szszsz-7R(RR+sz(3sz-5R)), sx, sy | |
| faddp // 35sysy+sx(sx-21sy), A*x, szszsz-7R(RR+sz(3sz-5R)), sx, sy | |
| fmulp st(3), st // A*x, szszsz-7R(RR+sz(3sz-5R)), sx(35sysy+sx(sx-21sy)), sy | |
| fxch st(3) // sy, szszsz-7R(RR+sz(3sz-5R)), sx(35sysy+sx(sx-21sy)), A*x | |
| fld st | |
| fmul st, st | |
| fmulp | |
| fmul qword [edi + 152-112] // 7sysysy, szszsz-7R(RR+sz(3sz-5R)), sx(35sysy+sx(sx-21sy)), A*x | |
| fsubp st(2), st // szszsz-7R(RR+sz(3sz-5R)), sx(35sysy+sx(sx-21sy))-7sysysy, A*x | |
| fmul qword [ecx] | |
| fmul qword [edi - 16-112] //*dZmul | |
| fsubr qword [esi + 40] | |
| fstp qword [ecx] // sx(35sysy+sx(sx-21sy))-7sysysy, A*x | |
| fmulp | |
| fadd qword [esi + 24] | |
| fstp qword [eax] | |
| pop edi | |
| pop esi | |
| end; | |
| procedure HybridIntP8(var x, y, z, w: Double; PIteration3D: TPIteration3D); //P8 white's formula | |
| asm | |
| push esi | |
| push edi | |
| mov esi, [ebp + 8] //PIteration3D | |
| fld qword [eax] //x | |
| mov edi, [esi + 48] //PVars | |
| fmul st(0), st(0) //xx | |
| fld qword [edx] //y | |
| add edi, 88 | |
| fmul st(0), st(0) //yy,xx | |
| fld qword [ecx] //z,yy,xx | |
| fmul st(0), st(0) //zz,yy,xx | |
| fld st(2) //xx,zz,yy,xx | |
| fadd st(0), st(2) //xx+yy=r,zz,yy,xx | |
| fld st(0) //r,r,zz,yy,xx | |
| fmul st(0), st(1) //rr,r,zz,yy,xx | |
| fld st(2) | |
| fmul st(0), st(0) //zzzz(S3*S3),rr,r,zz,yy,xx | |
| fld st(2) //r,zzzz(S3*S3),rr,r,zz,yy,xx z calculation | |
| fmul st(0), st(4) //r*zz | |
| fmul qword [edi + 56] //6*r*zz,zzzz(S3*S3),rr,r,zz,yy,xx | |
| fsubr st(0), st(1) //zzzz-6rzz,zzzz,rr,r,zz,yy,xx | |
| fadd st(0), st(2) //zzzz-6rzz+rr,zzzz,rr,r,zz,yy,xx | |
| fld st(4) //zz,zzzz-6rzz+rr,zzzz,rr,r,zz,yy,xx | |
| fsub st(0), st(4) //zz-r,zzzz-6rzz+rr,zzzz,rr,r,zz,yy,xx | |
| fmulp //(zz-r)*(zzzz-6rzz+rr),zzzz,rr,r,zz,yy,xx | |
| fld st(3) //r,(zz-r)*(zzzz-6rzz+rr),zzzz,rr,r,zz,yy,xx | |
| fsqrt | |
| fmulp //sqrt(r)*(zz-r)*(zzzz-6rzz+rr),zzzz,rr,r,zz,yy,xx | |
| fmul qword [ecx] //*z | |
| fmul qword [edi + 72] //*8 | |
| fmul qword [edi - 104] //*dZmul | |
| fchs | |
| fadd qword [esi + 40] //+J3 | |
| fstp qword [ecx] //zzzz,rr,r,zz,yy,xx | |
| fld st(0) //zzzz,zzzz,rr,r,zz,yy,xx a calculation | |
| fadd st(0), st(2) //zzzz+rr,zzzz,rr,r,zz,yy,xx | |
| fmulp st(3), st(0) //zzzz,rr,r*(zzzz+rr),zz,yy,xx | |
| fld st(1) //rr,zzzz,rr,r*(zzzz+rr),zz,yy,xx | |
| fmul qword [edi + 120] //rr*70,zzzz,rr,r*(zzzz+rr),zz,yy,xx | |
| fadd st(0), st(1) | |
| fmulp //(rr*70+zzzz)*zzzz,rr,r*(zzzz+rr),zz,yy,xx | |
| fxch st(2) //r*(zzzz+rr),rr,(rr*70+zzzz)*zzzz,zz,yy,xx | |
| fmulp st(3), st(0) //rr,(rr*70+zzzz)*zzzz,zz*r*(zzzz+rr),yy,xx | |
| fxch st(2) //zz*r*(zzzz+rr),(rr*70+zzzz)*zzzz,rr,yy,xx | |
| fmul qword [edi + 104] //28*zz*r*(zzzz+rr),(rr*70+zzzz)*zzzz,rr,yy,xx | |
| fsubp //(rr*70+zzzz)*zzzz-28*zz*r*(zzzz+rr),rr,yy,xx | |
| fxch st(1) | |
| fmul st(0), st(0) //rrrr,(rr*70+zzzz)*zzzz-28*zz*r*(zzzz+rr),yy,xx | |
| fadd qword [edi - 64] // 24-88 +1e-40 | |
| fdivp //(zzzz*(rr*70+zzzz)-28*zz*r*(zzzz+rr))/rrrr,yy,xx | |
| fadd qword [edi - 56] //a,yy,xx +1 | |
| fld st(1) //yy,a,yy,xx y calculation | |
| fmul qword [edi + 64] //7*yy,a,yy,xx + 152-128=24 | |
| fld st(3) //xx,7*yy,a,yy,xx | |
| fmul qword [edi + 64] //7*xx,7*yy,a,yy,xx | |
| fsub st(0), st(3) //7*xx-yy,7*yy,a,yy,xx | |
| fld st(4) //xx,7*xx-yy,7*yy,a,yy,xx | |
| fsubr st(2), st(0) //xx,7*xx-yy,xx-7*yy,a,yy,xx | |
| fmul st(0), st(0) //xxxx,7*xx-yy,xx-7*yy,a,yy,xx | |
| fmul st(2), st(0) //xxxx,7xx-yy,xxxx(xx-7yy),a,yy,xx | |
| fld st(4) //yy,xxxx,7xx-yy,xxxx(xx-7yy),a,yy,xx | |
| fmul st(0), st(0) //yyyy,xxxx,7xx-yy,xxxx(xx-7yy),a,yy,xx | |
| fmul st(2), st(0) //yyyy,xxxx,yyyy(7xx-yy),xxxx(xx-7yy),a,yy,xx | |
| fxch st(2) //yyyy(7xx-yy),xxxx,yyyy,xxxx(xx-7yy),a,yy,xx | |
| faddp st(3), st(0) //xxxx,yyyy,yyyy(7xx-yy)+xxxx(xx-7yy),a,yy,xx | |
| fxch st(2) //yyyy(7xx-yy)+xxxx(xx-7yy),yyyy,xxxx,a,yy,xx | |
| fmul qword [edi + 72] //*8 | |
| fmul qword [eax] //*x | |
| fmul qword [edx] //*y | |
| fmul st(0), st(3) //*a | |
| fadd qword [esi + 32] //+J2 | |
| fstp qword [edx] //yyyy,xxxx,a,yy,xx | |
| fld st(1) //xxxx,yyyy,xxxx,a,yy,xx | |
| fmul qword [edi + 120] //70xxxx,yyyy,xxxx,a,yy,xx | |
| fadd st(0), st(1) //70xxxx+yyyy,yyyy,xxxx,a,yy,xx | |
| fmul st(0), st(1) //yyyy(70xxxx+yyyy),yyyy,xxxx,a,yy,xx | |
| fxch st(1) //yyyy,yyyy(70xxxx+yyyy),xxxx,a,yy,xx | |
| fadd st(0), st(2) //yyyy+xxxx,yyyy(70xxxx+yyyy),xxxx,a,yy,xx | |
| fmulp st(4), st(0) //yyyy(70xxxx+yyyy),xxxx,a,yy(yyyy+xxxx),xx | |
| fxch st(4) //xx,xxxx,a,yy(yyyy+xxxx),yyyy(70xxxx+yyyy) | |
| fmulp st(3), st(0) //xxxx,a,xxyy(yyyy+xxxx),yyyy(70xxxx+yyyy) | |
| fmul st(0), st(0) //xxxx*xxxx,a,xxyy(yyyy+xxxx),yyyy(70xxxx+yyyy) | |
| faddp st(3), st(0) //a,xxyy(yyyy+xxxx),xxxx*xxxx+yyyy(70xxxx+yyyy) | |
| fxch st(1) //xxyy(yyyy+xxxx),a,xxxx*xxxx+yyyy(70xxxx+yyyy) | |
| fmul qword [edi + 104] | |
| fsubp st(2), st(0) //a,xxxx*xxxx+yyyy(70xxxx+yyyy)-28xxyy(yyyy+xxxx) | |
| fmulp | |
| fadd qword [esi + 24] | |
| fstp qword [eax] | |
| pop edi | |
| pop esi | |
| end; | |
| procedure HybridCubeSSE2(var x, y, z, w: Double; PIteration3D: TPIteration3D); // is used in alt hybrid without DE on w | |
| asm | |
| push esi | |
| push ebx | |
| mov esi, [ebp + 8] //PIteration3D | |
| mov ebx, [esi + 48] | |
| movupd xmm2, [eax] //[x,y] | |
| movsd xmm4, [ecx] //[z] | |
| movapd xmm0, xmm2 | |
| maxpd xmm0, [ebx - 64] //const:-1,-1,1,1 | |
| maxsd xmm4, [ebx - 64] | |
| minpd xmm0, [ebx - 48] | |
| minsd xmm4, [ebx - 48] | |
| addpd xmm0, xmm0 | |
| addsd xmm4, xmm4 | |
| subpd xmm0, xmm2 | |
| subsd xmm4, [ecx] | |
| movapd xmm1, xmm0 //x, y | |
| movsd xmm5, xmm4 | |
| mulpd xmm1, xmm1 //x*x, y*y | |
| mulsd xmm5, xmm5 //z*z | |
| pshufd xmm6, xmm1, $4E //y*y, x*x copies and swaps hi<>lo | |
| addsd xmm1, xmm5 //x*x + z*z | |
| addsd xmm1, xmm6 // w = sqr(r) | |
| ucomisd xmm1, [ebx - 32] //<dOption2 was:dOpt3 | |
| jnb @u1 | |
| movsd xmm3, [ebx - 24] //dOption1 | |
| jmp @u3 | |
| @u1:ucomisd xmm1, [ebx + 32] //<1 ? ucomisd slow? | |
| movsd xmm3, [ebx - 16] //dPow = scale //Was:dOpt2 | |
| jnb @u3 | |
| divsd xmm3, xmm1 | |
| @u3:shufpd xmm3, xmm3, 0 | |
| movupd xmm5, [esi + 24] //[J1,J2] | |
| mulpd xmm0, xmm3 | |
| mulsd xmm4, xmm3 | |
| addpd xmm0, xmm5 | |
| addsd xmm4, [esi + 40] //J3 | |
| movlpd [eax], xmm0 | |
| movhpd [edx], xmm0 | |
| movsd [ecx], xmm4 | |
| pop ebx | |
| pop esi | |
| end; | |
| procedure HybridCube(var x, y, z, w: Double; PIteration3D: TPIteration3D); | |
| asm | |
| push esi //Amazing box x87 with options fold fold x2 | |
| push ebx | |
| mov esi, [ebp + 8] //PIteration3D | |
| mov esi, [esi + 48] //was:PAligned16 | |
| mov ebx, eax | |
| fld qword [esi - 40] //fold | |
| fld qword [eax] //x,fold | |
| fld st(0) //x,x,fold folding with x = abs(x+fold) - abs(x-fold) - x | |
| fsub st(0), st(2) | |
| fabs | |
| fadd st(0), st(1) //abs(x-fold)+x,x,fold | |
| fxch //x,abs(x-fold)+x,fold | |
| fadd st(0), st(2) | |
| fabs | |
| fsubrp //abs(x+fold)-(abs(x-fold)+x),fold | |
| fld qword [edx] //y,fold | |
| fld st(0) | |
| fsub st(0), st(3) | |
| fabs | |
| fadd st(0), st(1) | |
| fxch | |
| fadd st(0), st(3) | |
| fabs | |
| fsubrp | |
| fld qword [ecx] //y,fold | |
| fld st(0) | |
| fsub st(0), st(4) | |
| fabs | |
| fadd st(0), st(1) | |
| fxch | |
| fadd st(0), st(4) | |
| fabs | |
| fsubrp //z,y,x,fold | |
| fld st(0) //7 | |
| fmul st(0), st(1) | |
| fld st(2) //8 | |
| fmul st(0), st(3) | |
| faddp //7 | |
| fld st(3) //8 | |
| fmul st(0), st(4) | |
| faddp //r,z,y,x,-fold,fold,fold x2 | |
| fcom qword [esi - 32] | |
| fnstsw ax | |
| shr ah, 1 | |
| jnc @@7 | |
| fstp st(0) | |
| fld qword [esi - 24] | |
| jmp @@9 | |
| @@7: //r,z,y,x,-fold,fold,fold x2 | |
| fld1 | |
| fcom st(1) | |
| fnstsw ax | |
| shr ah, 1 | |
| jc @@8 | |
| fstp st(0) | |
| fdivr qword [esi - 16] | |
| jmp @@9 | |
| @@8: | |
| fcompp | |
| fld qword [esi - 16] | |
| @@9: | |
| fmul st(3), st(0) //mul,zr,yr,xr,-fold,fold, foldx2 | |
| fmul st(2), st(0) | |
| fmulp //zr,yr,xr,-fold,fold, foldx2 | |
| mov esi, [ebp + 8] | |
| fadd qword [esi + 40] | |
| fstp qword [ecx] | |
| fadd qword [esi + 32] | |
| fstp qword [edx] | |
| fadd qword [esi + 24] | |
| fstp qword [ebx] | |
| fstp st(0) | |
| mov eax, ebx | |
| pop ebx | |
| pop esi | |
| end; | |
| procedure HybridCubeDE(var x, y, z, w: Double; PIteration3D: TPIteration3D); | |
| asm | |
| push esi //Amazing box without adding c x87 with option fold | |
| push ebx | |
| mov esi, [ebp + 8] //PIteration3D | |
| mov ebx, eax | |
| mov esi, [esi + 48] //was:PAligned16 | |
| fld qword [esi - 40] //fold | |
| fld st(0) | |
| fchs //-fold,fold | |
| fld qword [ebx] //x,-fold,fold | |
| fld st(0) //x,x,-fold,fold folding with x = abs(x+fold) - abs(x-fold) - x | |
| fadd st(0), st(2) | |
| fabs | |
| fadd st(0), st(1) | |
| fxch //x,abs(x-fold)+x,-fold,fold | |
| fadd st(0), st(3) | |
| fabs | |
| fsubrp //abs(x+fold)-(abs(x-fold)+x),-fold,fold | |
| fld qword [edx] //y,x,-fold,fold | |
| fld st(0) | |
| fadd st(0), st(3) | |
| fabs | |
| fadd st(0), st(1) | |
| fxch | |
| fadd st(0), st(4) | |
| fabs | |
| fsubrp | |
| fld qword [ecx] //z,y,x,-fold,fold | |
| fld st(0) | |
| fadd st(0), st(4) | |
| fabs | |
| fadd st(0), st(1) | |
| fxch | |
| fadd st(0), st(5) | |
| fabs | |
| fsubrp | |
| fld st(0) //7 | |
| fmul st(0), st(1) | |
| fld st(2) //8 | |
| fmul st(0), st(3) | |
| faddp //7 | |
| fld st(3) //8 | |
| fmul st(0), st(4) | |
| faddp //r,z,y,x,-fold,fold | |
| fcom qword [esi - 32] | |
| fnstsw ax | |
| shr ah, 1 | |
| jnc @@7 | |
| fstp st(0) | |
| fld qword [esi - 24] | |
| jmp @@9 | |
| @@7: //r,z,y,x,-fold,fold | |
| fld1 | |
| fcom st(1) | |
| fnstsw ax | |
| shr ah, 1 | |
| jc @@8 | |
| fstp st(0) | |
| fdivr qword [esi - 16] | |
| jmp @@9 | |
| @@8: | |
| fcompp | |
| fld qword [esi - 16] | |
| @@9: | |
| fld qword [ecx + 8] //w,mul,zr,yr,xr,-fold,fold | |
| fmul st(0), st(1) | |
| fstp qword [ecx + 8] | |
| fmul st(3), st(0) //mul,zr,yr,xr,-fold,fold | |
| fmul st(2), st(0) | |
| fmulp //zr,yr,xr,-fold,fold | |
| mov esi, [ebp + 8] | |
| fadd qword [esi + 40] | |
| fstp qword [ecx] | |
| fadd qword [esi + 32] | |
| fstp qword [edx] | |
| fadd qword [esi + 24] | |
| fstp qword [ebx] | |
| fcompp | |
| mov eax, ebx | |
| pop ebx | |
| pop esi | |
| end; | |
| procedure HybridCubeSSE2DE(var x, y, z, w: Double; PIteration3D: TPIteration3D); | |
| asm | |
| push esi | |
| push ebx | |
| mov esi, [ebp + 8] //PIteration3D | |
| mov ebx, [esi + 48] //was:PAligned16 | |
| movupd xmm2, [eax] //[x,y] | |
| movsd xmm4, [ecx] //[z] | |
| movapd xmm0, xmm2 | |
| maxpd xmm0, [ebx - 64] //const:-R,-R,R,R | |
| maxsd xmm4, [ebx - 64] | |
| minpd xmm0, [ebx - 48] | |
| minsd xmm4, [ebx - 48] | |
| addpd xmm0, xmm0 | |
| addsd xmm4, xmm4 | |
| subpd xmm0, xmm2 | |
| subsd xmm4, [ecx] | |
| movapd xmm1, xmm0 //x, y | |
| movsd xmm5, xmm4 | |
| mulpd xmm1, xmm1 //x*x, y*y | |
| mulsd xmm5, xmm5 //z*z | |
| pshufd xmm2, xmm1, $4E //y*y, x*x copies and swaps hi<>lo | |
| addsd xmm1, xmm5 | |
| addsd xmm1, xmm2 // w = sqr(r) | |
| ucomisd xmm1, [ebx - 32] //<dOption2 //7/6 clocks ucomisd latency :-( | |
| movsd xmm3, [ebx - 24] //dOption1 | |
| jb @u3 | |
| ucomisd xmm1, [ebx + 32] //<1 ? | |
| movsd xmm3, [ebx - 16] //dPow = scale | |
| jnb @u3 | |
| divsd xmm3, xmm1 | |
| @u3: | |
| movhpd xmm4, [ecx + 8] | |
| shufpd xmm3, xmm3, 0 //r, r | |
| movupd xmm5, [esi + 24] //[J1,J2] | |
| mulpd xmm0, xmm3 | |
| mulpd xmm4, xmm3 | |
| addpd xmm0, xmm5 | |
| addsd xmm4, [esi + 40] //J3 | |
| movupd [eax], xmm0 | |
| movupd [ecx], xmm4 | |
| pop ebx | |
| pop esi | |
| end; | |
| procedure HybridItIntPow2scale(var x, y, z, w: Double; PIteration3D: TPIteration3D); //sine bulb with scaling | |
| asm | |
| push esi | |
| push edi | |
| mov edi, [ebp + 8] | |
| mov esi, [edi + 48] | |
| fld qword [ecx] | |
| fld qword [edx] | |
| fld qword [eax] //x,y,z | |
| fld qword [esi - 72] // scaling | |
| fld1 | |
| fdivrp | |
| fmul st(3), st(0) | |
| fmul st(2), st(0) | |
| fmulp | |
| fld st(1) //y,x,y,z | |
| fmul st(0), st(2) // y*y,x,y,z | |
| fld st(1) // x,y*y,x,y,z | |
| fmul st(0), st(2) // x*x, y*y,x,y,z | |
| fld st(0) // x*x, x*x, y*y,x,y,z | |
| fadd st(0), st(2) // xx+yy, xx, yy,x,y,z | |
| fld st(0) // xx+yy, xx+yy, xx, yy,x,y,z | |
| fsqrt | |
| fmul st(0), st(6) //*z | |
| fadd st(0), st(0) //*2 | |
| fchs | |
| fmul qword [esi - 72] | |
| fadd qword [edi + 40] | |
| fstp qword [ecx] //xx+yy, xx, yy,x,y,z | |
| fld st(5) //z, xx+yy, xx, yy,x,y,z | |
| fmulp st(6), st(0) //xx+yy, xx, yy,x,y,z*z | |
| fld st(0) //xx+yy, xx+yy, xx, yy,x,y,z*z | |
| fsubrp st(6), st(0) //xx+yy, xx, yy,x,y, a - z*z | |
| fdivp st(5), st(0) //xx, yy,x,y, a - z*z / a = a | |
| fsubrp //xx-yy,x,y, a | |
| fmul st(0), st(3) //a(xx-yy),x,y, a | |
| fmul qword [esi - 72] | |
| fadd qword [edi + 24] | |
| fstp qword [eax] //x,y, a | |
| fmulp | |
| fmulp //x*y*a | |
| fadd st(0), st(0) //*2 | |
| fmul qword [esi - 72] | |
| fadd qword [edi + 32] | |
| fstp qword [edx] | |
| pop edi | |
| pop esi | |
| end; | |
| procedure HybridFolding(var x, y, z, w: Double; PIteration3D: TPIteration3D); | |
| asm | |
| push esi | |
| push edi | |
| push ebx | |
| mov esi, [ebp + 8] //PIteration3D | |
| mov edi, [esi + 48] | |
| fld qword [edi - 24] //fold | |
| fld qword [eax] //x,fold | |
| fld st(0) //x,x,fold folding with x = abs(x+fold) - abs(x-fold) - x | |
| fsub st(0), st(2) | |
| fabs | |
| fadd st(0), st(1) //abs(x-fold)+x,x,fold | |
| fxch //x,abs(x-fold)+x,fold | |
| fadd st(0), st(2) | |
| fabs | |
| fsubrp //abs(x+fold)-(abs(x-fold)+x),fold | |
| fstp qword [eax] | |
| fld qword [edx] //y,fold | |
| fld st(0) | |
| fsub st(0), st(2) | |
| fabs | |
| fadd st(0), st(1) | |
| fxch | |
| fadd st(0), st(2) | |
| fabs | |
| fsubrp | |
| fstp qword [edx] | |
| fld qword [ecx] //z,fold | |
| fld st(0) | |
| fsub st(0), st(2) //z-fold,z,fold | |
| fabs | |
| fadd st(0), st(1) //z+abs(z-fold),z,fold | |
| fxch st(2) | |
| faddp //z+fold,z+abs(z-fold) | |
| fabs | |
| fsubrp //z' | |
| fstp qword [ecx] | |
| mov ebx, [ebp + 12] | |
| push ebx | |
| push esi | |
| call [edi - 52] | |
| pop ebx | |
| pop edi | |
| pop esi | |
| end; | |
| procedure HybridCustomIFS; //for IFS, different calling convention! esi+edi is @it3dext.x+128 and @Pvar | |
| asm | |
| end; | |
| procedure AexionC(var x, y, z, w: Double; PIteration3D: TPIteration3D); | |
| asm | |
| push esi | |
| push edi | |
| push ebx | |
| push ecx | |
| mov esi, [ebp + 8] //PIteration3D | |
| mov edi, [esi + 48] | |
| fld qword [ecx] | |
| fld qword [edx] | |
| fld qword [eax] //x,y,z | |
| fld st(1) | |
| fmul st, st //yy,x,y,z | |
| fxch st(2) //y,x,yy,z | |
| fld st(3) | |
| fmul st, st //zz,y,x,yy,z | |
| fld st(2) | |
| fmul st, st //xx,zz,y,x,yy,z | |
| fld st(1) //zz,xx,zz,y,x,yy,z | |
| fadd st, st(1) | |
| faddp st(5), st //xx,zz,y,x,r1,z | |
| faddp | |
| fsqrt //sqrt(xx+zz),y,x,r1,z | |
| fxch | |
| fpatan //th,x,r1,z | |
| fxch st(3) | |
| fxch //x,z,r1,th | |
| fpatan //ph,r1,th | |
| fld qword [edi - 16] //pow,ph,r1,th | |
| fmul st(3), st | |
| fmul st(1), st | |
| fmul qword [edi - 8] //pow*0.5,ph,r1,th | |
| fxch //ph,pow',r1,th | |
| fxch st(2) //r1,pow',ph,th | |
| fldln2 //power function base,expo -> st, st(1) | |
| fxch | |
| fyl2x | |
| fxch | |
| fmulp | |
| fldl2e | |
| fmulp | |
| fld st(0) | |
| frndint | |
| fsub st(1), st(0) | |
| fxch | |
| f2xm1 | |
| fld1 | |
| faddp | |
| fscale | |
| fstp st(1) //r1',ph,th | |
| fxch st(2) //th, ph, r1 | |
| fsincos //ct,st, ph, r1 | |
| fxch st(2) //ph, st,ct, r1 | |
| fsincos //cosP,sinP, sinT,cosT, r1 | |
| fmul st, st(3) | |
| fmul st, st(4) | |
| fadd qword [esi + 24] | |
| fstp qword [eax] //sinP, sinT,cosT, r1 | |
| fmulp st(2), st //sinT,cosT*SinP, r1 | |
| fmul st, st(2) | |
| fmul qword [edi - 24] | |
| fadd qword [esi + 40] | |
| fstp qword [ecx] //cosT*SinP, r1 | |
| fmulp | |
| fadd qword [esi + 32] | |
| fstp qword [edx] | |
| cmp dword [edi - 28], 0 | |
| jz @@1 | |
| fld qword [edi - 40] //pd^ | |
| cmp dword [edi - 52], 0 | |
| jz @@2 | |
| fld qword [eax] | |
| fsub qword [esi + 24] | |
| fmul st, st | |
| fld qword [edx] | |
| fsub qword [esi + 32] | |
| fmul st, st | |
| faddp | |
| fld qword [ecx] | |
| fsub qword [esi + 40] | |
| fmul st, st | |
| faddp | |
| fsqrt | |
| fmulp | |
| @@2: //pd^ | |
| fld qword [esi + 24] | |
| fmul st, st | |
| fld qword [esi + 32] | |
| fmul st, st | |
| faddp | |
| fld qword [esi + 40] | |
| fmul st, st | |
| faddp | |
| fsqrt //r1, pd^ | |
| mov ebx, [edi - 56] | |
| test ebx, 16 //Modus Bit1: Flip atan theta, 2: Flip atan phi, 3: Flip theta and phi, 4: Flip CxCy, 5: diffs | |
| jz @@4 // r,y z,x y<>x/z y<>x | |
| fld qword [eax] | |
| fsub qword [esi + 24] | |
| fld qword [ecx] | |
| fsub qword [esi + 40] | |
| fld qword [edx] | |
| fsub qword [esi + 32] | |
| jmp @@5 | |
| @@4: | |
| fld qword [esi + 24] | |
| fld qword [esi + 40] | |
| fld qword [esi + 32] | |
| @@5: //Cy, Cz, Cx, r1, pd^ | |
| xor eax, eax //offset for cond phi test, normally x, or z if only flip-at2 | |
| xor ecx, ecx | |
| add ecx, 8 | |
| test ebx, 8 //Modus bit4: flip CYCX | |
| jz @@6 | |
| fxch //st(2) //(Cx,Cz,Cy) | |
| add ecx, 8 //(Cz,Cx,Cy) test: Flip Cy<>Cz | |
| @@6: //y, z, x, r1, pd^ | |
| fld st(1) | |
| fmul st, st | |
| fld st(3) | |
| fmul st, st | |
| faddp //xx+zz, y, z, x, r1, pd^ | |
| fsqrt //sqrt(sqr(j1)+sqr(j3)), y, z, x, r1, pd | |
| test ebx, 1 //flip AT theta | |
| jnz @@8 | |
| fxch | |
| @@8: | |
| fpatan //th, Cz, Cx, r1, pd | |
| fxch st(2) //Cx, Cz, th, r1, pd | |
| test ebx, 2 //flip AT phi | |
| jz @@9 | |
| fxch | |
| add eax, 24 | |
| sub eax, ecx | |
| @@9: | |
| fpatan //ph, th, r1, pd | |
| test ebx, 4 | |
| jz @@7 | |
| fxch | |
| mov eax, ecx | |
| @@7: | |
| cmp dword [edi - 32], 0 | |
| jz @@10 | |
| test dword [edx + eax - 4], $80000000 | |
| jnz @@10 | |
| fchs | |
| @@10: | |
| fmul st, st(3) | |
| fxch st(3) //pd, th, r1, ph | |
| fmulp //th, r1, ph | |
| fsincos //costh,sinth,r1,ph | |
| fxch st(3) //ph,sinth,r1,costh | |
| fsincos //Cx,Sx,Sy,r1,Cy | |
| fmul st, st(4) | |
| fmul st, st(3) | |
| fstp qword [esi + 24] //Sx,Sy,r1,Cy | |
| fmulp st(3), st //Sy,r1,Cy*Sx | |
| fmul st, st(1) | |
| fmul qword [edi - 48] | |
| fstp qword [esi + 40] //r1,Cy*Sx | |
| fmulp | |
| fstp qword [esi + 32] | |
| @@1: | |
| pop ecx | |
| pop ebx | |
| pop edi | |
| pop esi | |
| end; | |
| procedure TCrc32Stream.add(var data; datasize:longint); assembler; register; | |
| asm | |
| pushad | |
| mov edi, eax | |
| mov esi, edx | |
| jecxz @done | |
| mov edx, [TCrc32Stream(edi).curcrc] | |
| cld | |
| @lp1: | |
| xor eax, eax | |
| lodsb | |
| xor al, dl | |
| shl eax, 2 | |
| mov ebx, OFFSET CrcTable | |
| add ebx, eax | |
| mov eax, [ebx] | |
| shr edx, 8 | |
| xor edx, eax | |
| loop @lp1 | |
| mov [TCrc32Stream(edi).curcrc], edx | |
| @done: | |
| popad | |
| end; | |
| function DotOf2VecNormalize(norm, light, view: TPSVec): Single; | |
| asm | |
| fld dword [eax] | |
| fld dword [eax + 4] | |
| fld dword [eax + 8] //norm2, norm1, norm0 | |
| fld dword [ecx] | |
| fmul st, st(3) | |
| fld dword [ecx + 4] | |
| fmul st, st(3) | |
| faddp | |
| fld dword [ecx + 8] | |
| fmul st, st(2) | |
| faddp | |
| fadd st, st //d2, norm2, norm1, norm0 | |
| fmul st(3), st | |
| fmul st(2), st | |
| fmulp //norm2', norm1', norm0' | |
| fsubr dword [ecx + 8] | |
| fmul dword [edx + 8] | |
| fxch | |
| fsubr dword [ecx + 4] | |
| fmul dword [edx + 4] | |
| faddp | |
| fxch | |
| fsubr dword [ecx] | |
| fmul dword [edx] | |
| faddp | |
| end; | |
| procedure calcAmbshadow(var dAmbS, sAmplitude: Single; PsiLight: TPsiLight5); | |
| const s1d16383: Single = 1/16383; | |
| asm | |
| fld1 | |
| cmp word [ecx + 12], 16383 | |
| jl @@2 | |
| fld1 | |
| jmp @@3 | |
| @@2: | |
| fild word [ecx + 12] | |
| fmul s1d16383 | |
| @@3: | |
| fld dword [edx] //Ampl, Shadow, 1 | |
| mov edx, eax | |
| fcom st(2) | |
| fnstsw ax | |
| shr ah, 1 | |
| jc @@1 | |
| fxch | |
| fsubr st, st(2) //dAmbS, Ampl, 1 | |
| fxch //Ampl,dAmbS,1 | |
| fsubrp st(2), st //dAmbS,Ampl-1 | |
| fld st | |
| fmul st, st | |
| fsub st, st(1) //Sqr(dAmbS)-dAmbS,dAmbS,Ampl-1 | |
| fmulp st(2), st | |
| faddp | |
| fstp dword [edx] | |
| ret | |
| @@1: | |
| fmulp | |
| fsubp | |
| fstp dword [edx] | |
| end; | |
| function SqrSV255(const sv: TSVec): TSVec; | |
| asm | |
| fld dword [eax] | |
| fmul st, st | |
| fld dword [eax + 4] | |
| fmul st, st | |
| fld dword [eax + 8] | |
| fmul st, st | |
| fld s1d255 | |
| fmul st(3), st | |
| fmul st(2), st | |
| fmulp | |
| fstp [edx + 8] | |
| fstp [edx + 4] | |
| fstp [edx] | |
| end; | |
| function ConvertVLight(Win: Integer): Integer; | |
| asm | |
| push ecx | |
| and eax, $3FF | |
| mov ecx, eax | |
| shr ecx, 7 | |
| and eax, $7F | |
| shl eax, cl | |
| pop ecx | |
| mov edx, eax | |
| end; | |
| function AddSVecWeight(const SPos, SPosPlus: TSVec; const Step: Integer): TSVec; //math3d: procedure AddSVecWeight(V1, V2: TPSVec; W: Double); | |
| asm | |
| push ecx | |
| push ebx | |
| mov ebx, [ebp + 8] | |
| mov [ebp - 4], ecx | |
| fld dword [edx] | |
| fld dword [edx + 4] | |
| fld dword [edx + 8] | |
| fild dword [ebp - 4] | |
| fmul st(3), st | |
| fmul st(2), st | |
| fmulp | |
| fadd dword [eax + 8] | |
| fstp dword [ebx + 8] | |
| fadd dword [eax + 4] | |
| fstp dword [ebx + 4] | |
| fadd dword [eax] | |
| fstp dword [ebx] | |
| pop ebx | |
| pop ecx | |
| end; | |
| procedure ScaleSVecHDR(sv1: TPSVec); | |
| const s09: Single = 0.9; | |
| asm | |
| cmp SupportSSE, 0 | |
| jz @@1 | |
| movss xmm0, s09 | |
| movups xmm1, cSVec1 | |
| shufps xmm0, xmm0, 0 | |
| movups xmm2, dqword [eax] | |
| movaps xmm3, xmm2 | |
| mulps xmm2, xmm0 | |
| mulps xmm2, xmm2 | |
| addps xmm2, xmm1 | |
| rsqrtps xmm2, xmm2 | |
| mulps xmm3, xmm2 | |
| movups dqword [eax], xmm3 | |
| ret | |
| @@1: | |
| fld1 | |
| fld s09 | |
| fld dword [eax] | |
| fmul st, st(1) | |
| fmul st, st | |
| fadd st, st(2) | |
| fsqrt | |
| fdivr dword [eax] | |
| fstp dword [eax] | |
| fld dword [eax + 4] | |
| fmul st, st(1) | |
| fmul st, st | |
| fadd st, st(2) | |
| fsqrt | |
| fdivr dword [eax + 4] | |
| fstp dword [eax + 4] | |
| fld dword [eax + 8] | |
| fmulp | |
| fmul st, st | |
| faddp | |
| fsqrt | |
| fdivr dword [eax + 8] | |
| fstp dword [eax + 8] | |
| end; | |
| procedure ScaleSingleHDR(var s: Single); | |
| const s09: Single = 0.9; | |
| s1: Single = 1; | |
| asm | |
| cmp SupportSSE, 0 | |
| jz @@1 | |
| movss xmm0, dword [eax] | |
| movss xmm1, xmm0 | |
| mulss xmm0, s09 | |
| mulss xmm0, xmm0 | |
| addss xmm0, s1 | |
| rsqrtss xmm0, xmm0 | |
| mulss xmm1, xmm0 | |
| movss dword [eax], xmm1 | |
| ret | |
| @@1: | |
| fld1 //x := x / Sqrt(Sqr(x * 0.9) + 1); | |
| fld dword [eax] | |
| fmul s09 | |
| fmul st, st | |
| faddp | |
| fsqrt | |
| fdivr dword [eax] | |
| fstp dword [eax] | |
| end; | |
| procedure ScaleSingleHDRsqr(var s: Single); | |
| const s09: Single = 0.9; | |
| s1: Single = 1; | |
| asm | |
| cmp SupportSSE, 0 | |
| jz @@1 | |
| movss xmm0, dword [eax] | |
| mulss xmm0, xmm0 | |
| movss xmm1, xmm0 | |
| mulss xmm0, s09 | |
| mulss xmm0, xmm0 | |
| addss xmm0, s1 | |
| rsqrtss xmm0, xmm0 | |
| mulss xmm1, xmm0 | |
| sqrtss xmm1, xmm1 | |
| movss dword [eax], xmm1 | |
| ret | |
| @@1: | |
| fld dword [eax] //x := Sqrt(x*x / Sqrt(Sqr(x*x * 0.9) + 1)); | |
| fmul st, st | |
| fld st //xx,xx | |
| fmul s09 | |
| fmul st, st | |
| fld1 | |
| faddp | |
| fsqrt | |
| fdivp | |
| fsqrt | |
| fstp dword [eax] | |
| end; | |
| procedure SVec2ColSSE(sv1: TPSVec; pc: PCardinal); | |
| asm // eax edx | |
| add esp, -16 | |
| movups xmm0, dqword [eax] | |
| movups xmm1, cSVec1 | |
| movups xmm2, cSVec255 | |
| xorps xmm3, xmm3 | |
| minps xmm0, xmm1 | |
| maxps xmm0, xmm3 | |
| mulps xmm0, xmm2 | |
| movups [esp], xmm0 | |
| cvtss2si eax, xmm0 | |
| fld dword [esp + 8] | |
| fistp word [edx] | |
| fld dword [esp + 4] | |
| fistp word [edx + 1] | |
| mov [edx + 2], al | |
| add esp, 16 | |
| end; | |
| procedure LabCubicRootSSE(sv: TPSVec); | |
| const wstart: array[0..3] of Single = (0.4275, 0.4275, 0.4275, 0.4275); | |
| sftc: array[0..3] of Single = (216/24389, 216/24389, 216/24389, 216/24389); | |
| smul: array[0..3] of Single = (841/108, 841/108, 841/108, 841/108); | |
| asm | |
| movups xmm0, [eax] //r | |
| movaps xmm4, xmm0 | |
| movups xmm6, sftc | |
| movups xmm1, wstart | |
| maxps xmm4, xmm6 | |
| movups xmm7, smul | |
| movaps xmm5, xmm4 | |
| minps xmm0, xmm6 | |
| addps xmm5, xmm5 //2r | |
| mov edx, 3 | |
| @ll: movaps xmm2, xmm1 | |
| mulps xmm2, xmm2 | |
| mulps xmm2, xmm1 //www | |
| movaps xmm3, xmm2 | |
| addps xmm3, xmm3 | |
| addps xmm2, xmm5 | |
| addps xmm3, xmm4 | |
| mulps xmm1, xmm2 | |
| divps xmm1, xmm3 | |
| dec edx | |
| jnz @ll | |
| subps xmm0, xmm6 | |
| mulps xmm0, xmm7 | |
| addps xmm0, xmm1 | |
| movups [eax], xmm0 | |
| end; | |
| procedure LabCubicRoot2SSE(sv: TPSVec); //rsqrtps less precise! | |
| const wstart: array[0..3] of Single = (0.3661, 0.3661, 0.3661, 0.3661); | |
| sftc: array[0..3] of Single = (216/24389, 216/24389, 216/24389, 216/24389); | |
| smul: array[0..3] of Single = (841/108, 841/108, 841/108, 841/108); | |
| s1d3: array[0..3] of Single = (1/3, 1/3, 1/3, 1/3); | |
| s4d3: array[0..3] of Single = (4/3, 4/3, 4/3, 4/3); | |
| asm | |
| movups xmm0, [eax] //r | |
| movaps xmm4, xmm0 | |
| movups xmm6, sftc | |
| movups xmm1, wstart | |
| movups xmm5, s4d3 | |
| movups xmm3, s1d3 | |
| maxps xmm4, xmm6 | |
| movups xmm7, smul | |
| minps xmm0, xmm6 | |
| mov edx, 3 | |
| @ll: movaps xmm2, xmm1 | |
| mulps xmm1, xmm4 //w*r | |
| rsqrtps xmm1, xmm1 | |
| mulps xmm2, xmm3 | |
| rsqrtps xmm1, xmm1 | |
| mulps xmm1, xmm5 | |
| subps xmm1, xmm2 | |
| dec edx | |
| jnz @ll | |
| subps xmm0, xmm6 | |
| mulps xmm0, xmm7 | |
| addps xmm0, xmm1 | |
| movups [eax], xmm0 | |
| end; | |
| procedure LabPow3SSE(sv: TPSVec); | |
| const sftc: array[0..3] of Single = (6/29, 6/29, 6/29, 6/29); | |
| smul: array[0..3] of Single = (108/841, 108/841, 108/841, 108/841); | |
| asm | |
| movups xmm0, [eax] //r | |
| movaps xmm4, xmm0 | |
| movups xmm6, sftc | |
| maxps xmm4, xmm6 | |
| movups xmm7, smul | |
| movaps xmm5, xmm4 | |
| minps xmm0, xmm6 | |
| mulps xmm5, xmm5 | |
| subps xmm0, xmm6 | |
| mulps xmm5, xmm4 //rrr | |
| mulps xmm0, xmm7 | |
| addps xmm0, xmm5 | |
| movups [eax], xmm0 | |
| end; | |
| procedure QuickSortInt(count: Integer; var List: array of TSortItem); | |
| procedure QuickSort(const L, R: Integer; List: TPSortItem); //L:eax R:edx List:ecx | |
| asm | |
| push ebx | |
| push esi | |
| push edi | |
| mov ebx, eax //Lpos := L | |
| mov esi, edx //Rpos := R | |
| dec ebx | |
| mov edi, [ecx + edx * 8] //ListR := List[R].iZ; | |
| @@1: | |
| inc ebx | |
| cmp edi, [ecx + ebx * 8] | |
| jg @@1 | |
| @@2: | |
| dec esi | |
| cmp esi, ebx | |
| jle @@4 //break | |
| cmp edi, [ecx + esi * 8] | |
| jl @@2 | |
| push eax | |
| push edx | |
| mov eax, [ecx + ebx * 8] | |
| mov edx, [ecx + esi * 8] | |
| mov [ecx + esi * 8], eax | |
| mov [ecx + ebx * 8], edx | |
| mov eax, [ecx + ebx * 8 + 4] | |
| mov edx, [ecx + esi * 8 + 4] | |
| mov [ecx + esi * 8 + 4], eax | |
| mov [ecx + ebx * 8 + 4], edx | |
| pop edx | |
| pop eax | |
| jmp @@1 | |
| @@4: | |
| mov esi, [ecx + ebx * 8] | |
| mov [ecx + edx * 8], esi | |
| mov [ecx + ebx * 8], edi | |
| mov esi, [ecx + ebx * 8 + 4] | |
| mov edi, [ecx + edx * 8 + 4] | |
| mov [ecx + edx * 8 + 4], esi | |
| mov [ecx + ebx * 8 + 4], edi | |
| dec ebx | |
| cmp ebx, eax | |
| jle @@5 | |
| mov esi, edx | |
| mov edx, ebx | |
| call QuickSort | |
| mov edx, esi | |
| @@5: | |
| add ebx, 2 | |
| cmp ebx, edx | |
| jge @@6 | |
| mov esi, eax | |
| mov eax, ebx | |
| call QuickSort | |
| mov eax, esi | |
| @@6: | |
| pop edi | |
| pop esi | |
| pop ebx | |
| end; | |
| function RMcalcVLight(StepCount: Single): Integer; | |
| asm | |
| push ecx | |
| fld dword [ebp + 8] | |
| fistp dword [esp] | |
| mov eax, [esp] | |
| cmp eax, 16383 | |
| jle @1 | |
| mov eax, 16383 | |
| @1: bsr ecx, eax | |
| jz @2 | |
| sub ecx, 6 | |
| jle @2 | |
| shr eax, cl | |
| shl ecx, 7 | |
| or eax, ecx | |
| @2: pop ecx | |
| end; | |
| procedure RMCalcRoughness(N: TPVec3D; var sRough: Single; dt2, dsG: PDouble); | |
| asm | |
| cmp SupportSSE2, 0 | |
| jz @@1 | |
| movupd xmm0, [eax] | |
| movsd xmm1, [eax + 16] | |
| movsd xmm2, [ecx] | |
| mulpd xmm0, xmm0 | |
| mulsd xmm1, xmm1 | |
| mulsd xmm2, xmm2 | |
| addsd xmm1, xmm0 | |
| mov eax, [ebp + 8] | |
| unpckhpd xmm0, xmm0 | |
| mulsd xmm2, [eax] | |
| addsd xmm1, xmm0 | |
| mulsd xmm2, d7 | |
| addsd xmm1, d1em40 | |
| addsd xmm2, d1em40 | |
| xorpd xmm3, xmm3 | |
| divsd xmm2, xmm1 | |
| maxsd xmm2, xmm3 | |
| sqrtsd xmm2, xmm2 | |
| subsd xmm2, d005 | |
| maxsd xmm2, xmm3 | |
| minsd xmm2, d1p0 | |
| cvtsd2ss xmm4, xmm2 | |
| movss [edx], xmm4 | |
| jmp @end | |
| @@1: | |
| fld qword [eax] | |
| fmul st, st | |
| fld qword [eax + 8] | |
| fmul st, st | |
| faddp | |
| fld qword [eax + 16] | |
| fmul st, st | |
| faddp | |
| fadd d1em100 | |
| mov eax, [ebp + 8] | |
| fld qword [ecx] | |
| fmul st, st | |
| fmul qword [eax] | |
| fmul s7 | |
| fadd d1em100 | |
| fdivrp | |
| ftst | |
| fnstsw ax | |
| shr ah, 1 | |
| jnc @1 | |
| fstp st | |
| fldz | |
| @1: fsqrt | |
| fld s005 //0.05, sR' | |
| fcom st(1) | |
| fnstsw ax | |
| shr ah, 1 | |
| jc @up | |
| fcompp | |
| xor eax, eax | |
| mov [edx], eax | |
| jmp @end | |
| @up: | |
| fsubp | |
| fld1 | |
| fcomp st(1) | |
| fnstsw ax | |
| and ah, 41H | |
| jz @up2 | |
| fstp st | |
| fld1 | |
| @up2: | |
| fstp dword [edx] | |
| @end: | |
| end; | |
| procedure RMCalculateStartPos(pMCTparas: PMCTparameter; ix, iy: Integer); | |
| asm | |
| add eax, $78 | |
| cmp dword [eax + TMCTparameter.MCTCameraOptic - $78], 2 | |
| jne @@2 | |
| mov ecx, dword [eax + TMCTparameter.pIt3Dext - $78] | |
| fld qword [eax + TMCTparameter.Ystart - $78] | |
| fld qword [eax + TMCTparameter.Ystart - $78 + 8] | |
| fld qword [eax + TMCTparameter.Ystart - $78 + 16] | |
| fstp qword [ecx + TIteration3Dext.C3] | |
| fstp qword [ecx + TIteration3Dext.C2] | |
| fstp qword [ecx + TIteration3Dext.C1] | |
| ret | |
| @@2: | |
| cmp SupportSSE2, 0 | |
| jz @@1 | |
| push ecx | |
| push edx | |
| cvtpi2pd xmm7, [esp] //xx,yy | |
| mov ecx, dword [eax + TMCTparameter.pIt3Dext - $78] //+68 | |
| lea edx, eax + $78 //TMCTparameter.Ystart | |
| movapd xmm6, xmm7 | |
| unpckhpd xmm7, xmm7 //yy,yy | |
| unpcklpd xmm6, xmm6 //xx,xx | |
| movupd xmm0, [eax + TMCTparameter.Vgrads - $78] | |
| movupd xmm2, [eax + TMCTparameter.Vgrads - $60] | |
| movupd xmm4, [edx + TMCTparameter.Ystart - $78 - $78] | |
| mulpd xmm0, xmm6 | |
| mulsd xmm6, [eax + TMCTparameter.Vgrads - $68] | |
| mulpd xmm2, xmm7 | |
| mulsd xmm7, [eax + TMCTparameter.Vgrads - $50] | |
| addpd xmm0, xmm2 | |
| addsd xmm6, xmm7 | |
| addpd xmm0, xmm4 | |
| addsd xmm6, [edx + TMCTparameter.Ystart - $78 - $68] | |
| movupd [ecx + TIteration3Dext.C1], xmm0 | |
| movsd [ecx + TIteration3Dext.C3], xmm6 | |
| pop edx | |
| pop ecx | |
| ret | |
| @@1: | |
| push ecx | |
| fild dword [esp] | |
| push edx | |
| fild dword [esp] //xx,yy | |
| mov ecx, dword [eax + TMCTparameter.pIt3Dext - $78] //+68 | |
| lea edx, eax + $78 //TMCTparameter.Ystart | |
| fld qword [eax + TMCTparameter.Vgrads - $78] | |
| fmul st, st(1) | |
| fld qword [eax + TMCTparameter.Vgrads - $78 + 24] | |
| fmul st, st(3) | |
| faddp | |
| fadd qword [edx + TMCTparameter.Ystart - $78 - $78] | |
| fstp qword [ecx + TIteration3Dext.C1] | |
| fld qword [eax + TMCTparameter.Vgrads - $78 + 8] | |
| fmul st, st(1) | |
| fld qword [eax + TMCTparameter.Vgrads - $78 + 32] | |
| fmul st, st(3) | |
| faddp | |
| fadd qword [edx + TMCTparameter.Ystart - $70 - $78] | |
| fstp qword [ecx + TIteration3Dext.C2] //xx,yy | |
| fmul qword [eax + TMCTparameter.Vgrads - $78 + 16] | |
| fxch | |
| fmul qword [eax + TMCTparameter.Vgrads - $78 + 40] | |
| faddp | |
| fadd qword [edx + TMCTparameter.Ystart - $68 - $78] | |
| fstp qword [ecx + TIteration3Dext.C3] | |
| pop edx | |
| pop ecx | |
| end; | |
| procedure RMCalculateVgradsFOV(pMCTparas: PMCTparameter; ix: Integer); | |
| asm | |
| push ebx | |
| push esi | |
| push edx //to store ix in [esp] and fiload (esp := esp-4) | |
| lea ebx, eax + $1a0 | |
| fild dword [esp] //ix | |
| fsubr dword [ebx + TMCTparameter.FOVXoff - $1a0] | |
| fmul dword [ebx + TMCTparameter.FOVXmul - $1a0] | |
| fst qword [ebx + TMCTparameter.CAFX - $1a0] // $1a0 | |
| cmp dword [ebx + TMCTparameter.MCTCameraOptic - $1a0], 1 // $1fc | |
| je @@3 | |
| fstp st | |
| lea ecx, [ebx + TMCTparameter.mVgradsFOV - $1a0] | |
| lea edx, [ebx + TMCTparameter.CAFX - $1a0] // $1a0 | |
| lea eax, [ebx + TMCTparameter.CAFY - $1a0] // $1a8 | |
| cmp dword [ebx + TMCTparameter.MCTCameraOptic - $1a0], 0 // $1fc | |
| je @@1 | |
| call BuildViewVectorDSphereFOV | |
| jmp @@2 | |
| @@3: | |
| fchs | |
| fstp qword [ebx + TMCTparameter.mVgradsFOV - $1a0] | |
| fld qword [ebx + TMCTparameter.CAFY - $1a0] // $1a8 | |
| fstp qword [ebx + TMCTparameter.mVgradsFOV - $1a0 + 8] | |
| fld dword [ebx + TMCTparameter.mctPlOpticZ - $1a0] // $204 | |
| fstp qword [ebx + TMCTparameter.mVgradsFOV - $1a0 + 16] | |
| lea eax, [ebx + TMCTparameter.mVgradsFOV - $1a0] | |
| call NormaliseVectorVar | |
| jmp @@2 | |
| @@1: | |
| call BuildViewVectorDFOV | |
| @@2: | |
| lea edx, [ebx + TMCTparameter.VGrads - $1a0] // $80 | |
| lea eax, [ebx + TMCTparameter.mVgradsFOV - $1a0] | |
| call RotateVectorReverse | |
| pop edx //to Inc(esp, 4) | |
| pop esi | |
| pop ebx | |
| end; | |
| procedure RMdoColor(pMCTparas: PMCTparameter); | |
| const | |
| cd5200: Single = 5200; | |
| cd4096: Single = 4096; | |
| cd5215: Single = 5215; | |
| asm | |
| push ebx | |
| push edi | |
| push edx //just to get dword [esp] | |
| mov edi, [eax + TMCTparameter.mPsiLight] | |
| mov ebx, [eax + TMCTparameter.pIt3Dext] | |
| movzx edx, byte [eax + TMCTparameter.ColorOption] //coloroption | |
| cmp edx, 6 | |
| jnb @@COelse | |
| jmp dword [edx * 4 + @@jmptable] | |
| @@jmptable: | |
| dd @@COelse, @@CO1, @@CO2, @@CO3, @@CO4, @@CO5 | |
| @@CO1: | |
| fld qword [ebx + 8] //Rold | |
| fld1 | |
| faddp | |
| fdivr qword [ebx + $70] //Rout | |
| fldln2 | |
| fxch | |
| fyl2x | |
| fmul dword [eax + TMCTparameter.mctColorMul] //mctColorMul | |
| jmp @@up | |
| nop | |
| @@CO2: | |
| fld qword [ebx+$20] | |
| fsub qword [ebx+$40] | |
| jmp @1 | |
| @@CO3: | |
| fld qword [ebx+$28] | |
| fsub qword [ebx+$48] | |
| @1: fld qword [ebx+$18] | |
| fsub qword [ebx+$38] | |
| @2: fpatan | |
| fldpi | |
| faddp | |
| fmul cd5200 | |
| jmp @@up | |
| @@CO4: | |
| fld qword [ebx+$28] | |
| fsub qword [ebx+$48] | |
| fld qword [ebx+$20] | |
| fsub qword [ebx+$40] | |
| jmp @2 | |
| @@CO5: | |
| fld qword [ebx+$20] | |
| fld st | |
| fmul st, st //yy,y | |
| fld qword [ebx+$18] //x,yy,y | |
| fld st | |
| fmul st, st //xx,x,yy,y | |
| fxch st(3) //y,x,yy,xx | |
| fpatan | |
| fldpi | |
| faddp | |
| fmul cd5215 //s,yy,xx | |
| fxch st(2) //xx,yy,s | |
| faddp | |
| fadd d1em100 | |
| fld qword [ebx+$28] //z,yy+xx,s norm vec[2] for arcsin | |
| fld st | |
| fmul st, st //zz,z,yy+xx,s | |
| faddp st(2), st //z,rr,s | |
| fxch //rr,z,s | |
| fsqrt //r,z,s | |
| fdivp //z/r,s | |
| @@s2: | |
| fld1 //arcsin(x) = arctan2(x, sqrt(1-x*x)) | |
| fld st(1) | |
| fmul st(0), st(0) | |
| fsubp | |
| fsqrt | |
| fpatan | |
| fadd st, st | |
| fldpi | |
| faddp | |
| fmul cd5215 | |
| fstp dword [esp] | |
| lea edx, [edi + TsiLight5.SIgradient] | |
| mov eax, esp | |
| call MinMaxClip15bit | |
| jmp @@up | |
| @@COelse: | |
| fld qword [ebx + TIteration3Dext.OTrap] | |
| fmul cd4096 | |
| @@up: | |
| fstp dword [esp] | |
| lea edx, [edi + TsiLight5.Otrap] | |
| mov eax, esp | |
| call MinMaxClip15bit | |
| pop edx | |
| pop edi | |
| pop ebx | |
| end; | |
| const CS8388352: Single = 8388352; | |
| asm | |
| push ebx | |
| push esi | |
| push edi | |
| add esp, -24 | |
| mov edi, [eax+TMCTparameter.mPsiLight] //PSL | |
| mov esi, edx //cutplane | |
| lea ebx, eax + 128 //MCTparas | |
| test esi, esi //if cutplane>0 | |
| jle @@1 | |
| fld1 | |
| fld qword [ebx+TMCTparameter.mZZ-128] //+104 mZZ^,1 NN := 8388352 - ZcMul * (Sqrt(mZZ * Zcorr + 1) - 1); | |
| fmul qword [ebx+TMCTparameter.Zcorr-128] //$274 | |
| fadd st, st(1) | |
| fsqrt | |
| fsubrp | |
| fmul qword [ebx+TMCTparameter.ZcMul-128] //$26c | |
| fsubr CS8388352 //NN | |
| fistp dword [esp] | |
| mov eax, [esp] | |
| test eax, eax | |
| jns @@3 | |
| xor eax, eax | |
| @@3: | |
| shl eax, 8 // PCardinal(@PSL.RoughZposFine)^ := iTmp shl 8; | |
| mov [edi+6], eax | |
| dec esi //VGrads: +128 | |
| fld qword [ebx+esi*8+TMCTparameter.VGrads+$30-128] // if Abs(Vgrads[2, CutPlane]) < 1e-40 | |
| fabs | |
| fcomp d1em40 | |
| fnstsw ax | |
| shr ah, 1 | |
| jnc @@4 | |
| fld dm1e40 // NN := -1e40 | |
| jmp @@5 | |
| @@4: | |
| fld1 // NN := -1 / Vgrads[2, CutPlane]; | |
| fchs | |
| fdiv qword [ebx+esi*8+TMCTparameter.VGrads+$30-128] | |
| @@5: | |
| fld qword [ebx+esi*8+TMCTparameter.VGrads-128] // N[0] := Vgrads[0, CutPlane] * NN; | |
| fmul st, st(1) | |
| fstp qword [esp] | |
| fld qword [ebx+esi*8+TMCTparameter.VGrads+$18-128] // N[1] := Vgrads[1, CutPlane] * NN; | |
| fmulp | |
| fstp qword [esp+8] | |
| fld1 | |
| fchs // N[2] := -1; | |
| fstp qword [esp+16] | |
| mov edx, esp // MakeWNormalsFromDVec(TPLNormals(PSL), @N); | |
| mov eax, edi | |
| call MakeWNormalsFromDVec | |
| jmp @@6 | |
| @@1: | |
| xor eax, eax | |
| mov [edi+6], $7fff0000 | |
| mov dword [edi], eax | |
| mov word [edi+4], $8001 | |
| @@6: | |
| add esp, 24 | |
| pop edi | |
| pop esi | |
| pop ebx //} | |
| end; | |
| procedure RMdoBinSearch(pMCTparas: PMCTparameter; var DE, RLastStepWidth{, RLastDE}: Double); | |
| asm | |
| push ebx | |
| push esi | |
| push edi | |
| push ebp | |
| add esp, -8 | |
| mov edi, edx //@dTmp | |
| lea esi, eax+$38 //@MCTParas (was:qTMandCalcThread) | |
| mov ebx, [esi+TMCTparameter.pIt3Dext-$38] | |
| mov ebp, [esi+TMCTparameter.iDEAddSteps-$38] //+$40 | |
| fld qword [ecx] // RLastStepWidth | |
| fmul sm05 | |
| jmp @@2 | |
| @@4: | |
| fld qword [esi+TMCTparameter.mZZ-$38] //+$68 | |
| fadd qword [esp] | |
| fstp qword [esi+TMCTparameter.mZZ-$38] //+$68 | |
| fld qword [esi+TMCTparameter.mVgradsFOV-$38] | |
| fld qword [esi+TMCTparameter.mVgradsFOV-$38 + 8] | |
| fld qword [esi+TMCTparameter.mVgradsFOV-$38 + 16] | |
| fld qword [esp] | |
| fmul st(3), st | |
| fmul st(2), st | |
| fmulp | |
| fadd qword [ebx+TIteration3Dext.C1 + 16] | |
| fstp qword [ebx+TIteration3Dext.C1 + 16] | |
| fadd qword [ebx+TIteration3Dext.C1 + 8] | |
| fstp qword [ebx+TIteration3Dext.C1 + 8] | |
| fadd qword [ebx+TIteration3Dext.C1] | |
| fstp qword [ebx+TIteration3Dext.C1] | |
| fld qword [esi+TMCTparameter.mZZ-$38] //+$68 | |
| fmul dword [esi+TMCTparameter.mctDEstopFactor-$38] //+$54 | |
| fld1 | |
| faddp | |
| fmul dword [esi+TMCTparameter.DEstop-$38] //+$60 | |
| fstp dword [esi+TMCTparameter.msDEstop-$38] //+$38 msDEstop := DEstop * (1 + mZZ * mctDEstopFactor); | |
| dec ebp | |
| test ebp, ebp | |
| jle @@3 | |
| lea edx, esi-$38 | |
| mov eax, ebx | |
| call esi+TMCTparameter.CalcDE-$38 | |
| fstp qword [edi] //dTmp | |
| fld qword [edi] | |
| fcomp dword [esi+TMCTparameter.msDEstop-$38] //+$38 | |
| fnstsw ax | |
| fld qword [esp] | |
| fabs | |
| fmul s055 | |
| shr ah, 1 | |
| jnc @@8 | |
| fchs | |
| @@8: | |
| @@2: | |
| fstp qword [esp] | |
| fld qword [edi] | |
| fsub dword [esi+TMCTparameter.msDEstop-$38] //+$38 | |
| fabs | |
| fcomp s0001 | |
| fnstsw ax | |
| shr ah, 1 | |
| jnc @@4 | |
| @@3: | |
| add esp, 8 | |
| pop ebp | |
| pop edi | |
| pop esi | |
| pop ebx | |
| end; //} | |
| procedure CalcZposAndRough(siLight: TPsiLight5; mct: PMCTparameter; const ZZ: Double); | |
| push ebx | |
| sub esp, 4 | |
| fld1 | |
| test byte [ebp + 15], 128 //negative zz clip | |
| jns @1 | |
| fldz | |
| jmp @2 | |
| @1: fld qword [ebp + 8] | |
| @2: fmul qword [edx + TMCTparameter.Zcorr] | |
| fadd st(0), st(1) | |
| fsqrt //at fsqrt? ZZ * Zcorr > 1?? zz=-642!! | |
| fsubrp //invalid fp operation in critical ipol hybrid | |
| fmul qword [edx + TMCTparameter.ZcMul] | |
| fistp dword [esp] | |
| mov ebx, 8388352 | |
| sub ebx, dword [esp] | |
| test ebx, ebx | |
| jnl @up1 | |
| xor ebx, ebx | |
| @up1: | |
| cmp ebx, 8388352 | |
| jle @up2 | |
| mov ebx, 8388352 | |
| @up2: | |
| shl ebx, 8 | |
| cmp byte [edx + TMCTparameter.iSmNormals], 0 | |
| jle @up3 | |
| fld dword [edx + TMCTparameter.sRoughness] | |
| fmul s255 | |
| fistp dword [esp] | |
| or ebx, [esp] | |
| @up3: | |
| mov [eax + 6], ebx | |
| add esp, 4 | |
| pop ebx | |
| end; | |
| procedure FirstATlevelHiQ(PIA: TPCardinalArray; PsiLight: TPsiLight5; Leng: Integer); | |
| asm | |
| push esi | |
| dec ecx | |
| js @@out | |
| inc ecx | |
| add edx, 8 | |
| @@1: | |
| cmp word [edx], $8000 | |
| jnb @@2 | |
| mov esi, [edx-2] | |
| and esi, $ffffff00 | |
| shr esi, 1 | |
| jmp @@3 | |
| @@2: | |
| xor esi, esi | |
| @@3: | |
| mov [eax], esi | |
| add edx, 18 | |
| add eax, 4 | |
| dec ecx | |
| jnz @@1 | |
| @@out: | |
| pop esi | |
| end; | |
| procedure SmoothH(PIA, SA: TPCardinalArray; ya, Step: Integer); | |
| asm | |
| add esp, -12 | |
| push ebx | |
| push esi | |
| push edi | |
| mov [ebp-8], ecx | |
| mov ebx, edx | |
| mov edi, [ebp+8] | |
| mov edx, ecx | |
| test edx, edx | |
| jl @@2 | |
| inc edx | |
| mov [ebp-12], edx | |
| xor esi, esi | |
| @@1: | |
| mov edx, esi | |
| sub edx, edi | |
| test edx, edx | |
| jnl @@3 | |
| xor edx, edx | |
| @@3: | |
| mov ecx, edi | |
| add ecx, esi | |
| cmp ecx, [ebp-8] | |
| jle @@4 | |
| mov ecx, [ebp-8] | |
| @@4: | |
| mov ecx, [ebx+ecx*4] | |
| add ecx, [ebx+edx*4] | |
| shr ecx, 1 | |
| add ecx, [eax] | |
| shr ecx, 1 | |
| mov [eax], ecx | |
| inc esi | |
| add eax, 4 | |
| dec dword [ebp-12] | |
| jnz @@1 | |
| @@2: | |
| pop edi | |
| pop esi | |
| pop ebx | |
| add esp, 12 | |
| end; | |
| procedure SmoothV(PIA, SA: TPCardinalArray; ye, Step, wid: Integer); | |
| asm | |
| add esp, -12 | |
| push ebx | |
| push esi | |
| push edi | |
| mov [ebp-8], ecx | |
| mov ebx, edx | |
| mov edi, [ebp+12] | |
| mov edx, ecx | |
| test edx, edx | |
| jl @@2 | |
| inc edx | |
| mov [ebp-12], edx | |
| xor esi, esi | |
| @@1: | |
| mov edx, esi | |
| sub edx, edi | |
| test edx, edx | |
| jnl @@3 | |
| xor edx, edx | |
| @@3: | |
| mov ecx, edi | |
| add ecx, esi | |
| cmp ecx, [ebp-8] | |
| jle @@4 | |
| mov ecx, [ebp-8] | |
| @@4: | |
| mov ecx, [ebx+ecx*4] | |
| add ecx, [ebx+edx*4] | |
| shr ecx, 1 | |
| add ecx, [eax] | |
| shr ecx, 1 | |
| mov [eax], ecx | |
| inc esi | |
| add eax, dword [ebp+8] | |
| dec dword [ebp-12] | |
| jnz @@1 | |
| @@2: | |
| pop edi | |
| pop esi | |
| pop ebx | |
| add esp, 12 | |
| end; | |
| procedure MinSI(var SI: SmallInt; var i: Integer); | |
| asm | |
| movsx ecx, word [eax] | |
| cmp ecx, [edx] | |
| jnl @@1 | |
| cmp dword [edx], $7FFF | |
| jl @@2 | |
| mov word [eax], $7FFF | |
| ret | |
| @@2: | |
| mov edx, [edx] | |
| mov word [eax], dx | |
| @@1: | |
| end; | |
| function NotOnlyBackGround4(p: Pointer): Integer; | |
| asm | |
| mov edx, [eax] | |
| and edx, [eax + 18] | |
| and edx, [eax + 36] | |
| and edx, [eax + 54] | |
| and edx, $80000000 | |
| mov eax, edx | |
| end; | |
| procedure MakeZP4(p: Pointer; var zp: array of Integer); | |
| asm | |
| mov ecx, [eax] | |
| and ecx, $FFFFFF00 | |
| shr ecx, 1 | |
| mov [edx], ecx | |
| mov ecx, [eax + 18] | |
| and ecx, $FFFFFF00 | |
| shr ecx, 1 | |
| mov [edx + 4], ecx | |
| mov ecx, [eax + 36] | |
| and ecx, $FFFFFF00 | |
| shr ecx, 1 | |
| mov [edx + 8], ecx | |
| mov ecx, [eax + 54] | |
| and ecx, $FFFFFF00 | |
| shr ecx, 1 | |
| mov [edx + 12], ecx | |
| end; | |
| procedure TAmbHiQCalcR.Execute; | |
| // ... | |
| asm //~13s with 3 steps | |
| push eax | |
| push ebx | |
| push ecx | |
| push edx | |
| push esi | |
| push edi | |
| mov esi, PATL | |
| mov edx, psm | |
| mov edi, seed | |
| mov iDir, 31 | |
| xorps xmm2, xmm2 | |
| xorps xmm3, xmm3 | |
| xorps xmm4, xmm4 | |
| movss xmm5, sAbs | |
| xorps xmm6, xmm6 | |
| xorps xmm7, xmm7 | |
| movlps xmm4, ssub //xmm4 = ssub | |
| movlps xmm7, sstep //xmm7 = sstep | |
| @foriDir: mov eax, PS | |
| movlps xmm6, [eax] //xmm6 = PS[0,1] | |
| movlps xmm2, sMinRad | |
| movaps xmm1, xmm6 | |
| mulps xmm1, xmm2 | |
| subps xmm1, xmm4 //sxy-ssub | |
| mov eax, StepCount | |
| mov sc, eax | |
| @while: imul edi, $000343FD | |
| add edi, $269EC3 | |
| mov eax, edi | |
| movaps xmm0, xmm1 | |
| shr eax, 10 | |
| CVTSS2SI ecx, xmm0 //x2 | |
| mov ebx, eax | |
| and ebx, iand | |
| add ecx, ebx | |
| shufps xmm0, xmm0, 1 | |
| shr eax, 6 | |
| CVTSS2SI ebx, xmm0 //y2 | |
| and eax, iand | |
| add ebx, eax | |
| push ecx | |
| mov eax, ebx | |
| imul ecx, ecx | |
| imul eax, eax | |
| add eax, ecx | |
| pop ecx | |
| test eax, eax | |
| jz @skip | |
| CVTSI2SS xmm2, eax | |
| add ecx, dword [xy] | |
| add ebx, dword [xy + 4] | |
| test ecx, ecx //reflection at borders | |
| jns @@1 | |
| neg ecx | |
| cmp ecx, WLo | |
| jge @endwhile | |
| jmp @@2 | |
| @@1: cmp ecx, MWidth | |
| jl @@2 | |
| sub ecx, MW2 | |
| neg ecx | |
| cmp ecx, WHi | |
| jl @endwhile | |
| @@2: test ebx, ebx | |
| jns @@3 | |
| neg ebx | |
| cmp ebx, HLo | |
| jge @endwhile | |
| jmp @con | |
| @@3: cmp ebx, MHeight | |
| jl @con | |
| sub ebx, MH2 | |
| neg ebx | |
| cmp ebx, HHi | |
| jl @endwhile | |
| @con: imul ebx, MWidth | |
| add ebx, ecx | |
| mov eax, [esi + ebx * 4] //PATL^[y2 * MWidth + x2] | |
| sub eax, zp | |
| CVTSI2SS xmm0, eax | |
| RSQRTSS xmm2, xmm2 | |
| mulss xmm0, xmm2 //st | |
| movss xmm3, xmm0 | |
| andps xmm0, xmm5 | |
| mulss xmm3, sit | |
| addss xmm0, sZRT | |
| mulss xmm3, sZRT | |
| rcpss xmm0, xmm0 | |
| mulss xmm3, xmm0 | |
| minss xmm3, s32767 | |
| maxss xmm3, sm32768 | |
| CVTSS2SI eax, xmm3 //it := Round(st * sZRT * sit / (sZRT + Abs(st)) ); | |
| mov ecx, iDir | |
| cmp ax, word [edx + ecx * 2] | |
| jle @skip | |
| mov word [edx + ecx * 2], ax | |
| @skip: movaps xmm3, xmm7 //sstep | |
| mulps xmm3, xmm6 //DirXY | |
| addps xmm1, xmm3 //sx,sy | |
| dec sc | |
| jnz @while | |
| @endwhile: add PS, 8 | |
| dec iDir | |
| jns @foriDir | |
| mov seed, edi | |
| pop edi | |
| pop esi | |
| pop edx | |
| pop ecx | |
| pop ebx | |
| pop eax | |
| end | |
| // ... | |
| procedure TAmbHiQCalcRpano.Execute; | |
| // ... | |
| asm //~13s with 3 steps | |
| push eax | |
| push ebx | |
| push ecx | |
| push edx | |
| push esi | |
| push edi | |
| mov esi, PATL | |
| mov edx, psm | |
| mov edi, seed | |
| mov iDir, 31 | |
| xorps xmm2, xmm2 | |
| xorps xmm3, xmm3 | |
| xorps xmm4, xmm4 | |
| movss xmm5, sAbs | |
| xorps xmm6, xmm6 | |
| xorps xmm7, xmm7 | |
| movlps xmm4, ssub //xmm4 = ssub | |
| movlps xmm7, sstep //xmm7 = sstep | |
| @foriDir: mov eax, PS | |
| movlps xmm6, [eax] //xmm6 = PS[0,1] | |
| movlps xmm2, sMinRad | |
| movaps xmm1, xmm6 | |
| mulps xmm1, xmm2 | |
| subps xmm1, xmm4 //sxy-ssub | |
| mov eax, StepCount | |
| mov sc, eax | |
| @while: imul edi, $000343FD | |
| add edi, $269EC3 | |
| mov eax, edi | |
| movaps xmm0, xmm1 | |
| shr eax, 10 | |
| CVTSS2SI ecx, xmm0 //x2 | |
| mov ebx, eax | |
| and ebx, iand | |
| add ecx, ebx | |
| shufps xmm0, xmm0, 1 | |
| shr eax, 6 | |
| CVTSS2SI ebx, xmm0 //y2 | |
| and eax, iand | |
| add ebx, eax | |
| push ecx | |
| mov eax, ebx | |
| imul ecx, ecx | |
| imul eax, eax | |
| add eax, ecx | |
| pop ecx | |
| test eax, eax | |
| jz @skip | |
| CVTSI2SS xmm2, eax | |
| add ecx, dword [xy] | |
| add ebx, dword [xy + 4] | |
| test ecx, ecx // reflection at borders | |
| jns @@1 | |
| add ecx, MWidth | |
| test ecx, ecx | |
| jns @@2 | |
| jmp @endwhile | |
| @@1: cmp ecx, MWidth | |
| jl @@2 | |
| sub ecx, MWidth | |
| cmp ecx, MWidth | |
| jnl @endwhile | |
| @@2: test ebx, ebx | |
| jns @@3 | |
| neg ebx | |
| cmp ebx, HLo | |
| jge @endwhile | |
| jmp @con | |
| @@3: cmp ebx, MHeight | |
| jl @con | |
| sub ebx, MH2 | |
| neg ebx | |
| cmp ebx, HHi | |
| jl @endwhile | |
| @con: imul ebx, MWidth | |
| add ebx, ecx | |
| mov eax, [esi + ebx * 4] //PATL^[y2 * MWidth + x2] | |
| sub eax, zp | |
| CVTSI2SS xmm0, eax | |
| RSQRTSS xmm2, xmm2 | |
| mulss xmm0, xmm2 //st | |
| movss xmm3, xmm0 | |
| andps xmm0, xmm5 | |
| mulss xmm3, sit | |
| addss xmm0, sZRT | |
| mulss xmm3, sZRT | |
| rcpss xmm0, xmm0 | |
| mulss xmm3, xmm0 | |
| minss xmm3, s32767 | |
| maxss xmm3, sm32768 | |
| CVTSS2SI eax, xmm3 //it := Round(st * sZRT * sit / (sZRT + Abs(st)) ); | |
| mov ecx, iDir | |
| cmp ax, word [edx + ecx * 2] | |
| jle @skip | |
| mov word [edx + ecx * 2], ax | |
| @skip: movaps xmm3, xmm7 //sstep | |
| mulps xmm3, xmm6 //DirXY | |
| addps xmm1, xmm3 //sx,sy | |
| dec sc | |
| jnz @while | |
| @endwhile: add PS, 8 | |
| dec iDir | |
| jns @foriDir | |
| mov seed, edi | |
| pop edi | |
| pop esi | |
| pop edx | |
| pop ecx | |
| pop ebx | |
| pop eax | |
| end | |
| // ... | |
| procedure TAmbHiQCalcRT0.Execute; | |
| // ... | |
| asm | |
| push eax | |
| push ebx | |
| push ecx | |
| push edx | |
| push esi | |
| push edi | |
| mov esi, PATL | |
| mov edx, PSI | |
| mov edi, seed | |
| mov iDir, 31 | |
| xorps xmm1, xmm1 | |
| xorps xmm2, xmm2 | |
| xorps xmm3, xmm3 | |
| xorps xmm4, xmm4 | |
| xorps xmm5, xmm5 | |
| xorps xmm6, xmm6 | |
| movlps xmm4, ssub //xmm4 = ssub | |
| movlps xmm5, sstep //xmm5 = sstep | |
| @foriDir: mov eax, PS | |
| movlps xmm1, sMinRad // (1.2 at stepw1) | |
| movlps xmm6, [eax] //xmm6 = PS[0,1] | |
| mulps xmm1, xmm6 | |
| subps xmm1, xmm4 //sxy-ssub (-0,5 at stepw1) | |
| mov eax, StepCount | |
| mov sc, eax | |
| @while: imul edi, $000343FD | |
| add edi, $269EC3 | |
| mov eax, edi | |
| movaps xmm0, xmm1 //sx, sy | |
| shr eax, 10 | |
| CVTSS2SI ecx, xmm0 | |
| mov ebx, eax | |
| and ebx, iand | |
| add ecx, ebx | |
| shufps xmm0, xmm0, 1 | |
| shr eax, 6 | |
| CVTSS2SI ebx, xmm0 | |
| and eax, iand | |
| add ebx, eax | |
| push ecx | |
| mov eax, ebx | |
| imul ecx, ecx | |
| imul eax, eax | |
| add eax, ecx | |
| pop ecx | |
| test eax, eax | |
| jz @skip | |
| CVTSI2SS xmm2, eax | |
| add ecx, dword [xy] | |
| add ebx, dword [xy+4] | |
| test ecx, ecx // reflection at borders | |
| jns @@1 | |
| neg ecx | |
| cmp ecx, WLo | |
| jge @endwhile | |
| jmp @@2 | |
| @@1: cmp ecx, MWidth | |
| jl @@2 | |
| sub ecx, MW2 | |
| neg ecx | |
| cmp ecx, WHi | |
| jl @endwhile | |
| @@2: test ebx, ebx | |
| jns @@3 | |
| neg ebx | |
| cmp ebx, HLo | |
| jge @endwhile | |
| jmp @con | |
| @@3: cmp ebx, MHeight | |
| jl @con | |
| sub ebx, MH2 | |
| neg ebx | |
| cmp ebx, HHi | |
| jl @endwhile | |
| @con: imul ebx, MWidth | |
| add ebx, ecx | |
| mov eax, [esi + ebx * 4] //PATL^[y2 * MWidth + x2] | |
| sub eax, zp | |
| CVTSI2SS xmm0, eax // (CVTPI2PS=sse, 2 int to single) | |
| RSQRTSS xmm2, xmm2 | |
| mulss xmm0, xmm2 //st := (PATL^[y2 * MWidth + x2] - zp) / Sqrt(st); | |
| movss xmm3, xmm0 | |
| mulss xmm0, xmm0 | |
| mulss xmm3, sit | |
| addss xmm0, sZRT | |
| mulss xmm3, sZRT | |
| rcpss xmm0, xmm0 | |
| mulss xmm3, xmm0 | |
| minss xmm3, s32767 | |
| maxss xmm3, sm32768 | |
| CVTSS2SI eax, xmm3 //it := Round(st * sit * sZRT / (st * st + sZRT)); | |
| mov ecx, iDir | |
| cmp ax, word [edx + ecx * 2] | |
| jle @skip | |
| mov word [edx + ecx * 2], ax | |
| @skip: movaps xmm3, xmm5 //sstep | |
| mulps xmm3, xmm6 //DirXY | |
| addps xmm1, xmm3 //sx,sy | |
| dec sc | |
| jnz @while | |
| @endwhile: add PS, 8 | |
| dec iDir | |
| jns @foriDir | |
| mov seed, edi | |
| pop edi | |
| pop esi | |
| pop edx | |
| pop ecx | |
| pop ebx | |
| pop eax | |
| end | |
| // ... | |
| procedure TAmbHiQCalcRT0pano.Execute; | |
| // ... | |
| asm | |
| push eax | |
| push ebx | |
| push ecx | |
| push edx | |
| push esi | |
| push edi | |
| mov esi, PATL | |
| mov edx, PSI | |
| mov edi, seed | |
| mov iDir, 31 | |
| xorps xmm1, xmm1 | |
| xorps xmm2, xmm2 | |
| xorps xmm3, xmm3 | |
| xorps xmm4, xmm4 | |
| xorps xmm5, xmm5 | |
| xorps xmm6, xmm6 | |
| movlps xmm4, ssub //xmm4 = ssub | |
| movlps xmm5, sstep //xmm5 = sstep | |
| @foriDir: mov eax, PS | |
| movlps xmm1, sMinRad // (1.2 at stepw1) | |
| movlps xmm6, [eax] //xmm6 = PS[0,1] | |
| mulps xmm1, xmm6 | |
| subps xmm1, xmm4 //sxy-ssub (-0,5 at stepw1) | |
| mov eax, StepCount | |
| mov sc, eax | |
| @while: imul edi, $000343FD | |
| add edi, $269EC3 | |
| mov eax, edi | |
| movaps xmm0, xmm1 //sx, sy | |
| shr eax, 10 | |
| CVTSS2SI ecx, xmm0 | |
| mov ebx, eax | |
| and ebx, iand | |
| add ecx, ebx | |
| shufps xmm0, xmm0, 1 | |
| shr eax, 6 | |
| CVTSS2SI ebx, xmm0 | |
| and eax, iand | |
| add ebx, eax | |
| push ecx | |
| mov eax, ebx | |
| imul ecx, ecx | |
| imul eax, eax | |
| add eax, ecx | |
| pop ecx | |
| test eax, eax | |
| jz @skip | |
| CVTSI2SS xmm2, eax | |
| add ecx, dword [xy] | |
| add ebx, dword [xy+4] | |
| test ecx, ecx // reflection at borders | |
| jns @@1 | |
| add ecx, MWidth | |
| test ecx, ecx | |
| jns @@2 | |
| jmp @endwhile | |
| @@1: cmp ecx, MWidth | |
| jl @@2 | |
| sub ecx, MWidth | |
| cmp ecx, MWidth | |
| jnl @endwhile | |
| @@2: test ebx, ebx | |
| jns @@3 | |
| neg ebx | |
| cmp ebx, HLo | |
| jge @endwhile | |
| jmp @con | |
| @@3: cmp ebx, MHeight | |
| jl @con | |
| sub ebx, MH2 | |
| neg ebx | |
| cmp ebx, HHi | |
| jl @endwhile | |
| @con: imul ebx, MWidth | |
| add ebx, ecx | |
| mov eax, [esi + ebx * 4] //PATL^[y2 * MWidth + x2] | |
| sub eax, zp | |
| CVTSI2SS xmm0, eax // (CVTPI2PS=sse, 2 int to single) | |
| RSQRTSS xmm2, xmm2 | |
| mulss xmm0, xmm2 //st := (PATL^[y2 * MWidth + x2] - zp) / Sqrt(st); | |
| movss xmm3, xmm0 | |
| mulss xmm0, xmm0 | |
| mulss xmm3, sit | |
| addss xmm0, sZRT | |
| mulss xmm3, sZRT | |
| rcpss xmm0, xmm0 | |
| mulss xmm3, xmm0 | |
| minss xmm3, s32767 | |
| maxss xmm3, sm32768 | |
| CVTSS2SI eax, xmm3 //it := Round(st * sit * sZRT / (st * st + sZRT)); | |
| mov ecx, iDir | |
| cmp ax, word [edx + ecx * 2] | |
| jle @skip | |
| mov word [edx + ecx * 2], ax | |
| @skip: movaps xmm3, xmm5 //sstep | |
| mulps xmm3, xmm6 //DirXY | |
| addps xmm1, xmm3 //sx,sy | |
| dec sc | |
| jnz @while | |
| @endwhile: add PS, 8 | |
| dec iDir | |
| jns @foriDir | |
| mov seed, edi | |
| pop edi | |
| pop esi | |
| pop edx | |
| pop ecx | |
| pop ebx | |
| pop eax | |
| end | |
| // ... | |
| function VolLightMapPosSSE(vd: TPVec3D): LongBool; | |
| asm | |
| push esi | |
| push edx //to get esp buf | |
| lea esi, VolumeLightMap | |
| fld qword [eax] | |
| fsub qword [esi + TVolumetricLightMap.LightPos] | |
| fstp dword [esp] | |
| fld qword [eax + 8] | |
| movss xmm0, [esp] | |
| fsub qword [esi + TVolumetricLightMap.LightPos + 8] | |
| fstp dword [esp] | |
| fld qword [eax + 16] | |
| movss xmm1, [esp] | |
| fsub qword [esi + TVolumetricLightMap.LightPos + 16] | |
| fstp dword [esp] | |
| shufps xmm0, xmm0, 0 | |
| movss xmm2, [esp] | |
| shufps xmm1, xmm1, 0 | |
| shufps xmm2, xmm2, 0 | |
| movups xmm4, [esi + TVolumetricLightMap.RotMatrix] | |
| movups xmm5, [esi + TVolumetricLightMap.RotMatrix + 16] | |
| movups xmm6, [esi + TVolumetricLightMap.RotMatrix + 32] | |
| mulps xmm4, xmm0 | |
| mulps xmm5, xmm1 | |
| mulps xmm6, xmm2 | |
| addps xmm4, xmm5 | |
| addps xmm4, xmm6 | |
| xorps xmm2, xmm2 | |
| movhlps xmm5, xmm4 | |
| movss xmm1, [esi + TVolumetricLightMap.StretchSide1] | |
| movss xmm3, [esi + TVolumetricLightMap.HSizeS] | |
| movss xmm0, [esi + TVolumetricLightMap.CSizeS] | |
| shufps xmm1, xmm1, 0 | |
| shufps xmm3, xmm3, 0 | |
| shufps xmm0, xmm0, 0 | |
| mulps xmm4, xmm1 | |
| addps xmm4, xmm3 | |
| maxps xmm4, xmm2 | |
| minps xmm4, xmm0 | |
| cvtss2si eax, xmm4 | |
| shufps xmm4, xmm4, 1 | |
| cvtss2si edx, xmm4 | |
| imul edx, dword [esi + TVolumetricLightMap.CubeSize] | |
| mov esi, [esi + TVolumetricLightMap.CubeSides] | |
| add edx, eax | |
| xor eax, eax | |
| comiss xmm5, [esi + edx * 4] | |
| jnc @e | |
| mov eax, -1 | |
| @e: pop edx | |
| pop esi | |
| end; | |
| function GetVolLightMapVecSSE(vd: TPSVec): Single; | |
| asm | |
| push esi | |
| push ebx | |
| xorps xmm4, xmm4 | |
| lea esi, VolumeLightMap | |
| movups xmm5, [eax] | |
| movups xmm7, cAbsSVec | |
| movaps xmm0, xmm5 | |
| movaps xmm1, xmm5 | |
| movhlps xmm2, xmm5 | |
| shufps xmm1, xmm1, 1 | |
| andps xmm5, xmm7 | |
| movaps xmm6, xmm5 | |
| movhlps xmm7, xmm5 | |
| shufps xmm6, xmm6, 1 | |
| movss xmm3, [esi + TVolumetricLightMap.SizeFactor] | |
| ucomiss xmm5, xmm6 | |
| jc @1 | |
| ucomiss xmm5, xmm7 | |
| jc @2 | |
| xor edx, edx | |
| ucomiss xmm0, xmm4 | |
| adc edx, 0 | |
| @3: divss xmm3, xmm0 | |
| mulss xmm1, xmm3 | |
| mulss xmm2, xmm3 | |
| cvtss2si eax, xmm1 | |
| cvtss2si ebx, xmm2 | |
| jmp @e | |
| @2: mov edx, 4 | |
| ucomiss xmm2, xmm4 | |
| adc edx, 0 | |
| @4: divss xmm3, xmm2 | |
| mulss xmm0, xmm3 | |
| mulss xmm1, xmm3 | |
| cvtss2si eax, xmm0 | |
| cvtss2si ebx, xmm1 | |
| jmp @e | |
| @1: ucomiss xmm6, xmm7 | |
| jc @2 | |
| mov edx, 2 | |
| ucomiss xmm1, xmm4 | |
| adc edx, 0 | |
| @5: divss xmm3, xmm1 | |
| mulss xmm0, xmm3 | |
| mulss xmm2, xmm3 | |
| cvtss2si eax, xmm0 | |
| cvtss2si ebx, xmm2 | |
| @e: add ebx, [esi + TVolumetricLightMap.HalfSize] | |
| add eax, [esi + TVolumetricLightMap.HalfSize] | |
| imul ebx, dword [esi + TVolumetricLightMap.CubeSize] | |
| mov esi, [esi + edx * 4 + TVolumetricLightMap.CubeSides] | |
| add eax, ebx | |
| fld dword [esi + eax * 4] | |
| pop ebx | |
| pop esi | |
| end; | |
| function TCalcAmbShadowDEThreadGeneral.GetRand: Double; | |
| const dm: Double = 1 / $7FFFFF; | |
| asm | |
| add esp, -4 | |
| imul edx, [eax + seed], $343FD | |
| add edx, $269EC3 | |
| mov [eax + seed], edx | |
| shr edx, 8 | |
| and edx, $7FFFFF | |
| mov [esp], edx | |
| fild dword [esp] | |
| fmul dm | |
| add esp, 4 | |
| end; | |
| function TCalcAmbShadowDEThreadGeneral2.GetRand: Double; | |
| const dm: Double = 1 / $7FFFFF; | |
| asm | |
| add esp, -4 | |
| imul edx, [eax + seed], $343FD | |
| add edx, $269EC3 | |
| mov [eax + seed], edx | |
| shr edx, 8 | |
| and edx, $7FFFFF | |
| mov [esp], edx | |
| fild dword [esp] | |
| fmul dm | |
| add esp, 4 | |
| end; | |
| function RdTsc: int64; | |
| asm | |
| db $0f, $31 // RdTsc | |
| end; | |
| function Clamp255(i: Integer): Integer; | |
| asm | |
| cmp eax, 255 | |
| jle @up | |
| mov eax, 255 | |
| @up: | |
| end; | |
| procedure MakeCubicWeightsFromT(const t: Single; var sv: TSVec); //all weights 6 times bigger! | |
| const s3: Single = 3; | |
| s6: Single = 6; | |
| asm | |
| fld dword [ebp + 8] | |
| fld st | |
| fmul st, st //t*t,t | |
| fld st | |
| fmul st, st(2) //t³,t²,t | |
| fld s3 | |
| fmul st(2), st //3, t³=sv[3], 3*t²=sv[2], t | |
| fld st(2) //sv[2], 3, sv[3], sv[2], t | |
| fsub st, st(2) //sv[2]-sv[3], 3, sv[3], sv[2], t | |
| fsub st, st(4) //sv[2]-sv[3]-t, 3, sv[3], sv[2], t | |
| fsub st, st(4) //sv[2]-sv[3]-2*t, 3, sv[3], sv[2], t | |
| fstp dword [eax] //3, sv[3], sv[2], t | |
| fld st(1) //sv[3], 3, sv[3], sv[2], t | |
| fmul st, st(1) //3*sv[3], 3, sv[3], sv[2], t | |
| fsub st, st(3) //3*sv[3]-sv[2], 3, sv[3], sv[2], t | |
| fsub st, st(3) //3*sv[3]-2*sv[2], 3, sv[3], sv[2], t | |
| fld st(4) | |
| fmul st, st(2) | |
| fsubp | |
| fadd s6 | |
| fstp dword [eax + 4] | |
| fmul st, st(1) | |
| fsubp st(2), st //t³,3*t²-3*t³,t | |
| fsub st, st(2) | |
| fstp dword [eax + 12] //3*t²-3*t³,t | |
| fxch | |
| fmul s6 | |
| faddp | |
| fstp dword [eax + 8] | |
| end; | |
| function GetCosTabVal(const Tnr: Integer; const DotP, Rough: Single): Single; | |
| // ... | |
| asm | |
| mov edx, Tnr | |
| shl edx, 7 | |
| add edx, ip | |
| lea eax, DiffCosTabNsmall + edx * 4 | |
| movups xmm2, w | |
| movups xmm0, [eax] | |
| movups xmm1, [eax + $800] | |
| mulps xmm0, xmm2 | |
| mulps xmm1, xmm2 | |
| movaps xmm3, xmm0 | |
| unpcklps xmm3, xmm1 | |
| unpckhps xmm0, xmm1 | |
| addps xmm3, xmm0 | |
| movhlps xmm0, xmm3 | |
| addps xmm3, xmm0 | |
| movaps xmm2, xmm3 | |
| shufps xmm2, xmm2, 1 | |
| subss xmm2, xmm3 | |
| mulss xmm2, Rough | |
| addss xmm2, xmm3 | |
| movss Result, xmm2 | |
| end | |
| // ... | |
| function GetCosTabValSqr(const Tnr: Integer; const DotP, Rough: Single): Single; | |
| // ... | |
| asm | |
| mov edx, Tnr | |
| shl edx, 7 | |
| add edx, ip | |
| lea eax, DiffCosTabNsmall + edx * 4 | |
| movups xmm2, w | |
| movups xmm0, [eax] | |
| movups xmm1, [eax + $800] | |
| mulps xmm0, xmm2 | |
| mulps xmm1, xmm2 | |
| movaps xmm3, xmm0 | |
| unpcklps xmm3, xmm1 | |
| unpckhps xmm0, xmm1 | |
| addps xmm3, xmm0 | |
| movhlps xmm0, xmm3 | |
| addps xmm3, xmm0 | |
| mulps xmm3, xmm3 | |
| movaps xmm2, xmm3 | |
| shufps xmm2, xmm2, 1 | |
| subss xmm2, xmm3 | |
| mulss xmm2, Rough | |
| addss xmm2, xmm3 | |
| movss Result, xmm2 | |
| end | |
| // ... | |
| function TMCCalcThread.GetRand: Double; | |
| asm //begin result := random; end; | |
| imul edx, [eax + seed], $343FD | |
| add edx, $269EC3 | |
| mov [eax + seed], edx | |
| and edx, $7FFFFFFF | |
| push edx | |
| fild dword [esp] | |
| fmul dSeedMul | |
| pop edx | |
| end; | |
| function TMCCalcThread.GenSphereSVecOm: TSVec; //fullsphere | |
| asm | |
| cmp dword [eax + TMCCalcThread.bDoDOF], 0 | |
| jnz @@1 | |
| fld dword [eax + TMCCalcThread.HaltonDiscY] | |
| fld dword [eax + TMCCalcThread.HaltonDiscX] | |
| jmp @@2 | |
| @@1: | |
| push edx | |
| call GetRand | |
| call GetRand | |
| pop edx | |
| @@2: | |
| fmul PiM2 | |
| fsincos //cos,sin,v | |
| fld1 | |
| fsub st, st(3) | |
| fmul st, st(3) | |
| fsqrt | |
| fadd st, st //r,cos,sin,v | |
| fmul st(2), st | |
| fmulp //c',s',v | |
| fstp dword [edx] | |
| fstp dword [edx + 4] | |
| fadd st, st | |
| fld1 | |
| fsubrp | |
| fstp dword [edx + 8] | |
| xor eax, eax | |
| mov [edx + 12], eax //} | |
| end; | |
| function ByteSwap(const a: integer): integer; | |
| asm | |
| bswap eax | |
| end; | |
| function ByteSwap16(inp:word): word; | |
| asm | |
| bswap eax | |
| shr eax, 16 | |
| end; | |
| function TPngObject.RGB2Quad(RGB: pRGBPixel): TRGBQuad; | |
| asm | |
| push ecx | |
| mov ax, [edx] | |
| mov [esp], ax | |
| mov al, [edx + 2] | |
| mov [esp + 2], al | |
| mov eax, [esp] | |
| and eax, $00FFFFFF | |
| pop edx | |
| end; | |
| function ReturnAddr: Pointer; | |
| asm | |
| MOV EAX,[EBP+4] // sysutils.pas says [EBP-4], but this works ! | |
| end; | |
| function CompareMem(P1, P2: Pointer; Length: Integer): Boolean; assembler; | |
| asm | |
| PUSH ESI | |
| PUSH EDI | |
| MOV ESI,P1 | |
| MOV EDI,P2 | |
| MOV EDX,ECX | |
| XOR EAX,EAX | |
| AND EDX,3 | |
| SHR ECX,1 | |
| SHR ECX,1 | |
| REPE CMPSD | |
| JNE @@2 | |
| MOV ECX,EDX | |
| REPE CMPSB | |
| JNE @@2 | |
| @@1: INC EAX | |
| @@2: POP EDI | |
| POP ESI | |
| end; | |
| GetSystemInfo(SysInfo); | |
| asm | |
| MOV EDX, Colors | |
| MOV ECX, Count | |
| DEC ECX | |
| JS @@END | |
| LEA EAX, SysInfo | |
| CMP [EAX].TSystemInfo.wProcessorLevel, 3 | |
| JE @@386 | |
| @@1: MOV EAX, [EDX+ECX*4] | |
| BSWAP EAX | |
| SHR EAX,8 | |
| MOV [EDX+ECX*4],EAX | |
| DEC ECX | |
| JNS @@1 | |
| JMP @@END | |
| @@386: | |
| PUSH EBX | |
| @@2: XOR EBX,EBX | |
| MOV EAX, [EDX+ECX*4] | |
| MOV BH, AL | |
| MOV BL, AH | |
| SHR EAX,16 | |
| SHL EBX,8 | |
| MOV BL, AL | |
| MOV [EDX+ECX*4],EBX | |
| DEC ECX | |
| JNS @@2 | |
| POP EBX | |
| @@END: | |
| end; | |
| function Scan(Buf: PAnsiChar; Value: Byte; Count: integer): boolean; assembler; | |
| asm | |
| PUSH EDI | |
| MOV EDI, Buf | |
| MOV ECX, Count | |
| MOV AL, Value | |
| REPNE SCASB | |
| MOV EAX, False | |
| JNE @@1 | |
| MOV EAX, True | |
| @@1:POP EDI | |
| end; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment