Skip to content

Instantly share code, notes, and snippets.

View justinruggles's full-sized avatar

Justin Ruggles justinruggles

View GitHub Profile
@justinruggles
justinruggles / gist:3139463
Created July 18, 2012 22:44
ff_conv_s16p_to_flt_6ch
SECTION_RODATA 32
pb_shuf_unpack_even: db 11, 10, -1, -1, 9, 8, -1, -1, 3, 2, -1, -1, 1, 0, -1, -1
pb_shuf_unpack_odd: db 15, 14, -1, -1, 13, 12, -1, -1, 7, 6, -1, -1, 5, 4, -1, -1
SECTION_TEXT
;------------------------------------------------------------------------------
; void ff_conv_s16p_to_flt_6ch(float *dst, int16_t *const *src, int len,
; int channels);
pb_perm_unpackw_0: db 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27
pb_perm_unpackw_1: db 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
[...]
punpcklwd m0, m1 ; m0 = 0, 1, 6, 7, 12, 13, 18, 19
punpcklwd m2, m3 ; m2 = 4, 5, 10, 11, 16, 17, 22, 23
punpcklwd m4, m5 ; m4 = 2, 3, 8, 9, 14, 15, 20, 21
%if cpuflag(xop)
vpperm m1, m0, m4, [pb_perm_unpackw_0] ; m1 = 0, 1, 2, 3, 12, 13, 14, 15
From ac7c4b9b628f14f9bd8f457a9d15b0fee31e44bb Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Fri, 20 Jul 2012 15:24:40 -0400
Subject: [PATCH 1/4] dsputil: x86: convert PMINSD, PMAXSD, and CLIPD macros to use cpuflags
---
libavcodec/x86/dsputil_mmx.c | 6 ++--
libavcodec/x86/dsputil_yasm.asm | 67 +++++++++++++++++++--------------------
libavutil/x86/x86util.asm | 34 ++++++++++++-------
make tests/tiny_psnr
float mono test:
avconv -cpuflags 0 -i <input> -filter:a aformat=sample_formats\=flt -channel_layout mono -y out_flt_mono_c.wav
avconv -cpuflags fma4 -i <input> -filter:a aformat=sample_formats\=flt -channel_layout mono -y out_flt_mono_fma4.wav
./tests/tiny_psnr out_flt_mono_c.wav out_flt_mono_fma4.wav
float stereo test:
avconv -cpuflags 0 -i <input> -filter:a aformat=sample_formats\=flt -channel_layout stereo -y out_flt_stereo_c.wav
avconv -cpuflags fma4 -i <input> -filter:a aformat=sample_formats\=flt -channel_layout stereo -y out_flt_stereo_fma4.wav
time of s->imdct_512.imdct_half() in ac3dec
C: 38800
3dnow: 7500
3dnow2: 6700
SSE: 3700
time of mdct in ac3enc
C: 41941
3dnow: 25469
3dnow2: 24576
TEST fft-4
make: *** [fate-fft-4] Error 1
TEST ifft-4
make: *** [fate-ifft-4] Error 1
TEST mdct-4
TEST imdct-4
TEST rdft-4
TEST irdft-4
TEST dct1d-4
TEST idct1d-4
From 926c0121c9fd3b7234f175648db87bdb6e3c15e5 Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Fri, 20 Jul 2012 19:53:40 -0400
Subject: [PATCH] dsputil: x86: use cpuflags for apply_window_int16()
---
libavcodec/x86/dsputil_mmx.c | 12 +++---
libavcodec/x86/dsputil_yasm.asm | 79 +++++++++++++++++---------------------
2 files changed, 41 insertions(+), 50 deletions(-)
Program received signal SIGSEGV, Segmentation fault.
ff_imdct_half_3dnow.pre ()
at /home/justin/src/libav2/libavcodec/x86/fft_mmx.asm:1071
1071 DECL_IMDCT POSROTATESHUF_3DNOW
(gdb) disp/i $pc
1: x/i $pc
=> 0x85dfa1a <ff_imdct_half_3dnow.pre+67>: movq %mm0,0x0(%ebp)
(gdb) info all-registers
eax 0x8ee2940 149825856
ecx 0x8ee8d00 149851392
Dump of assembler code for function ff_imdct_half_3dnow.pre:
0x085df9d7 <+0>: mov 0x4(%esp),%ebp
0x085df9db <+4>: movd -0x4(%edx,%esi,4),%mm0
0x085df9e0 <+9>: movd (%edi,%ebx,2),%mm1
0x085df9e4 <+13>: punpckldq (%edx,%ebx,4),%mm0
0x085df9e8 <+17>: punpckldq 0x0(%ebp,%ebx,2),%mm1
0x085df9ed <+22>: movq %mm0,%mm2
0x085df9f0 <+25>: movq %mm1,%mm3
0x085df9f3 <+28>: psrlq $0x20,%mm3
0x085df9f7 <+32>: punpckldq %mm1,%mm3
Dump of assembler code for function ff_imdct_half_3dnow.pre:
0x085df9e7 <+0>: mov 0x4(%esp),%ebp
0x085df9eb <+4>: movd (%edx,%ebx,4),%mm0
0x085df9ef <+8>: movd -0x4(%edi,%esi,4),%mm1
0x085df9f4 <+13>: punpckldq -0x4(%edx,%esi,4),%mm0
0x085df9f9 <+18>: punpckldq -0x4(%ebp,%esi,4),%mm1
0x085df9fe <+23>: movq %mm0,%mm2
0x085dfa01 <+26>: movq %mm1,%mm3
0x085dfa04 <+29>: psrlq $0x20,%mm3
0x085dfa08 <+33>: punpckldq %mm1,%mm3