Last active
November 6, 2021 00:36
-
-
Save RealNeGate/1fdd3217ec68024515c672c7f1d30847 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <x86intrin.h> | |
typedef struct TriangleX4 { | |
__m128 ax, ay, az; | |
__m128 bx, by, bz; | |
__m128 cx, cy, cz; | |
} TriangleX4; | |
typedef struct TriangleNormalX4 { | |
__m128 nx, ny, nz; | |
} TriangleNormalX4; | |
// Computes normals 4-wide | |
void gen_normals(const TriangleX4* tri, TriangleNormalX4* out) { | |
// a.y * b.z - a.z * b.y | |
__m128 nx = _mm_sub_ps(_mm_mul_ps(tri->ay, tri->bz), _mm_mul_ps(tri->az, tri->by)); | |
// a.z * b.x - a.x * b.z | |
__m128 ny = _mm_sub_ps(_mm_mul_ps(tri->az, tri->bx), _mm_mul_ps(tri->ax, tri->bz)); | |
// a.x * b.y - a.y * b.x | |
__m128 nz = _mm_sub_ps(_mm_mul_ps(tri->ax, tri->by), _mm_mul_ps(tri->ay, tri->bx)); | |
// sqrt((nx * nx) + (ny * ny) + (nz * nz)) | |
__m128 inv_length = _mm_rsqrt_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(nx, nx), _mm_mul_ps(ny, ny)), _mm_mul_ps(nz, nz))); | |
// NOTE(NeGate): This will shit out NaN if the | |
// triangle is a degenerate fuck | |
out->nx = _mm_mul_ps(nx, inv_length); | |
out->ny = _mm_mul_ps(ny, inv_length); | |
out->nz = _mm_mul_ps(nz, inv_length); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
gen_normals: | |
vmovaps xmm2, XMMWORD PTR [rdi+80] | |
vmovaps xmm1, XMMWORD PTR [rdi+16] | |
vmovaps xmm0, XMMWORD PTR [rdi+64] | |
vmulps xmm3, xmm2, xmm1 | |
vmovaps xmm4, XMMWORD PTR [rdi+32] | |
vmovaps xmm6, XMMWORD PTR [rdi] | |
vmovaps xmm5, XMMWORD PTR [rdi+48] | |
vfnmadd231ps xmm3, xmm0, xmm4 | |
vmulps xmm0, xmm0, xmm6 | |
vmulps xmm4, xmm4, xmm5 | |
vfnmadd132ps xmm1, xmm0, xmm5 | |
vfnmadd132ps xmm2, xmm4, xmm6 | |
vmulps xmm0, xmm3, xmm3 | |
vfmadd231ps xmm0, xmm2, xmm2 | |
vfmadd231ps xmm0, xmm1, xmm1 | |
vrsqrtps xmm0, xmm0 | |
vmulps xmm3, xmm3, xmm0 | |
vmulps xmm2, xmm2, xmm0 | |
vmulps xmm1, xmm1, xmm0 | |
vmovaps XMMWORD PTR [rsi], xmm3 | |
vmovaps XMMWORD PTR [rsi+16], xmm2 | |
vmovaps XMMWORD PTR [rsi+32], xmm1 | |
ret |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment