Skip to content

Instantly share code, notes, and snippets.

View bzoli22's full-sized avatar

Zoltan Barczikay bzoli22

  • Ericsson
  • Budapest, Hungary
View GitHub Profile
typedef
__attribute__ ((aligned(16)))
float Vector[1024];
void add( Vector a, Vector b, Vector c)
{
for ( int i=0; i<1024; i++ )
a[i] = b[i] + c[i];
}
@bzoli22
bzoli22 / vectorization-4.s
Created January 14, 2009 21:24
automatic vectorization depending on alignment of the input
_add:
pushl %ebp
movl %esp, %ebp
pushl %esi
pushl %ebx
movl 8(%ebp), %ebx
movl 12(%ebp), %ecx
movl 16(%ebp), %edx
leal 16(%ebx), %esi
testb $15, %bl
L7:
movss (%ecx,%eax,4), %xmm0
addss (%edx,%eax,4), %xmm0
movss %xmm0, (%ebx,%eax,4)
incl %eax
cmpl $1024, %eax
jne L7
void add( float a[1024], float b[1024], float c[1024])
{
for ( int i=0; i<1024; i++ )
a[i] = b[i] + c[i];
}
_add:
pushl %ebp
movl %esp, %ebp
xorl %eax, %eax
.p2align 2,,3
L2:
movaps _b(%eax), %xmm0
addps _c(%eax), %xmm0
movaps %xmm0, _a(%eax)
addl $16, %eax
static float a[1024], b[1024], c[1024];
void add()
{
for ( int i=0; i<1024; i++ )
a[i] = b[i] + c[i];
}
_create_vector:
movss 8(%esp), %xmm1
movss 4(%esp), %xmm0
unpcklps %xmm1, %xmm0
movss 16(%esp), %xmm2
movss 12(%esp), %xmm1
unpcklps %xmm2, %xmm1
movlhps %xmm1, %xmm0
ret
_create_vector:
movss 8(%esp), %xmm0
movss 4(%esp), %xmm2
unpcklps %xmm0, %xmm2
movaps %xmm2, %xmm1
movss 16(%esp), %xmm0
movss 12(%esp), %xmm2
unpcklps %xmm0, %xmm2
movaps %xmm2, %xmm0
movaps %xmm1, %xmm2
typedef float v4t __attribute__ ((vector_size(16), aligned(16)));
v4t create_vector(float a, float b, float c, float d)
{
v4t v4 = {a, b, c, d};
return v4;
}
typedef float v3t __attribute__ ((vector_size(12), aligned(16)));
v3t addv3( v3t a, v3t b)
{
return a + b;
}