Skip to content

Instantly share code, notes, and snippets.

@djg
Created July 8, 2010 00:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save djg/467475 to your computer and use it in GitHub Desktop.
Save djg/467475 to your computer and use it in GitHub Desktop.
// One of many test.cpp I found. I think this one was for examining the generated assembly.
#include "xmmintrin.h"
#if 0
struct Vec3
{
float x, y, z;
__forceinline Vec3() {};
__forceinline Vec3(float X, float Y, float Z) : x(X), y(Y), z(Z) {};
__forceinline Vec3 operator*(const Vec3& t) const { return Vec3(x*t.x, y*t.y, z*t.z); };
__forceinline Vec3 operator+(const Vec3& t) const { return Vec3(x+t.x, y+t.y, z+t.z); };
};
typedef const Vec3& Vec3Arg;
typedef Vec3 Vec3Res;
#endif
#if 1
struct Vec3
{
// union
// {
__m128 vMM;
// struct { float x, y, z; };
// };
__forceinline Vec3() {};
// __forceinline Vec3(float X, float Y, float Z) : x(X), y(Y), z(Z) {};
__forceinline Vec3(__m128 t) : vMM(t) {};
__forceinline Vec3(Vec3& v) : vMM(v.vMM) {};
__forceinline Vec3& operator=(Vec3& v) { vMM = v.vMM; return *this; };
__forceinline Vec3 operator*(const Vec3& t) const { return Vec3(_mm_mul_ps(vMM, t.vMM)); };
__forceinline Vec3 operator+(const Vec3& t) const { return Vec3(_mm_add_ps(vMM, t.vMM)); };
};
typedef const Vec3 Vec3Arg;
typedef const Vec3 Vec3Res;
#endif
#if 0
typedef __m128 Vec3;
typedef __m128 Vec3Arg;
typedef __m128 Vec3Res;
__forceinline Vec3Res operator+(Vec3Arg a, Vec3Arg b) { return _mm_add_ps(a,b); };
__forceinline Vec3Res operator*(Vec3Arg a, Vec3Arg b) { return _mm_mul_ps(a,b); };
#endif
__declspec(noinline) Vec3Res MulAdd(Vec3Arg a, Vec3Arg b, Vec3Arg c)
{
return a * b + c;
}
// Dump of file test.obj
// File Type: COFF OBJECT
//
// ?MulAdd@@YA?AUVec3@@ABU1@00@Z (struct Vec3 __cdecl MulAdd(struct Vec3 const &,struct Vec3 const &,struct Vec3 const &)):
// 00000000: 8B 44 24 08 mov eax,dword ptr [esp+8]
// 00000004: 8B 4C 24 0C mov ecx,dword ptr [esp+0Ch]
// 00000008: F3 0F 10 00 movss xmm0,dword ptr [eax]
// 0000000C: F3 0F 10 09 movss xmm1,dword ptr [ecx]
// 00000010: F3 0F 10 51 04 movss xmm2,dword ptr [ecx+4]
// 00000015: F3 0F 10 59 08 movss xmm3,dword ptr [ecx+8]
// 0000001A: 8B 4C 24 10 mov ecx,dword ptr [esp+10h]
// 0000001E: 0F 5A C0 cvtps2pd xmm0,xmm0
// 00000021: 0F 5A C9 cvtps2pd xmm1,xmm1
// 00000024: F2 0F 59 C1 mulsd xmm0,xmm1
// 00000028: F3 0F 10 48 04 movss xmm1,dword ptr [eax+4]
// 0000002D: 66 0F 5A C0 cvtpd2ps xmm0,xmm0
// 00000031: 0F 5A D2 cvtps2pd xmm2,xmm2
// 00000034: 0F 5A C9 cvtps2pd xmm1,xmm1
// 00000037: F2 0F 59 CA mulsd xmm1,xmm2
// 0000003B: F3 0F 10 50 08 movss xmm2,dword ptr [eax+8]
// 00000040: 8B 44 24 04 mov eax,dword ptr [esp+4]
// 00000044: F3 0F 5A C0 cvtss2sd xmm0,xmm0
// 00000048: 0F 5A DB cvtps2pd xmm3,xmm3
// 0000004B: 0F 5A D2 cvtps2pd xmm2,xmm2
// 0000004E: F2 0F 59 D3 mulsd xmm2,xmm3
// 00000052: F3 0F 10 19 movss xmm3,dword ptr [ecx]
// 00000056: 0F 5A DB cvtps2pd xmm3,xmm3
// 00000059: F2 0F 58 D8 addsd xmm3,xmm0
// 0000005D: 66 0F 5A C3 cvtpd2ps xmm0,xmm3
// 00000061: F3 0F 11 00 movss dword ptr [eax],xmm0
// 00000065: F3 0F 10 41 04 movss xmm0,dword ptr [ecx+4]
// 0000006A: 0F 5A C0 cvtps2pd xmm0,xmm0
// 0000006D: 66 0F 5A C9 cvtpd2ps xmm1,xmm1
// 00000071: F3 0F 5A C9 cvtss2sd xmm1,xmm1
// 00000075: F2 0F 58 C1 addsd xmm0,xmm1
// 00000079: 66 0F 5A C0 cvtpd2ps xmm0,xmm0
// 0000007D: F3 0F 11 40 04 movss dword ptr [eax+4],xmm0
// 00000082: F3 0F 10 41 08 movss xmm0,dword ptr [ecx+8]
// 00000087: 66 0F 5A D2 cvtpd2ps xmm2,xmm2
// 0000008B: 0F 5A C0 cvtps2pd xmm0,xmm0
// 0000008E: F3 0F 5A CA cvtss2sd xmm1,xmm2
// 00000092: F2 0F 58 C1 addsd xmm0,xmm1
// 00000096: 66 0F 5A C0 cvtpd2ps xmm0,xmm0
// 0000009A: F3 0F 11 40 08 movss dword ptr [eax+8],xmm0
// 0000009F: C3 ret
// File Type: COFF OBJECT
//
// ?MulAdd@@YA?BUVec3@@ABU1@00@Z (struct Vec3 const __cdecl MulAdd(struct Vec3 const &,struct Vec3 const &,struct Vec3 const &)):
// 00000000: 55 push ebp
// 00000001: 8B EC mov ebp,esp
// 00000003: 83 E4 F0 and esp,0FFFFFFF0h
// 00000006: 8B 4D 0C mov ecx,dword ptr [ebp+0Ch]
// 00000009: 0F 28 01 movaps xmm0,xmmword ptr [ecx]
// 0000000C: 8B 55 10 mov edx,dword ptr [ebp+10h]
// 0000000F: 0F 28 0A movaps xmm1,xmmword ptr [edx]
// 00000012: 8B 4D 14 mov ecx,dword ptr [ebp+14h]
// 00000015: 8B 45 08 mov eax,dword ptr [ebp+8]
// 00000018: 0F 59 C1 mulps xmm0,xmm1
// 0000001B: 0F 28 09 movaps xmm1,xmmword ptr [ecx]
// 0000001E: 0F 58 C1 addps xmm0,xmm1
// 00000021: 0F 29 00 movaps xmmword ptr [eax],xmm0
// 00000024: 8B E5 mov esp,ebp
// 00000026: 5D pop ebp
// 00000027: C3 ret
// File Type: COFF OBJECT
// ?MulAdd@@YA?AT__m128@@T1@00@Z (union __m128 __cdecl MulAdd(union __m128,union __m128,union __m128)):
// 00000000: 0F 59 C1 mulps xmm0,xmm1
// 00000003: 0F 58 C2 addps xmm0,xmm2
// 00000006: C3 ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment